diff --git a/.gitmodules b/.gitmodules index 087be7d8..963c6d34 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "unity/source"] path = machine_interface/c_machine_libraries/unity/source - url = https://github.com/ThrowTheSwitch/Unity.git + url = git@github.com:ThrowTheSwitch/Unity.git [submodule "http"] path = net/http - url = https://github.com/cesanta/mongoose.git + url = git@github.com:cesanta/mongoose.git diff --git a/README.md b/README.md index aff73a67..9e673a36 100644 --- a/README.md +++ b/README.md @@ -114,3 +114,27 @@ If you use Dandelion, please cite our paper: # C Dependencies For testing the C code to interact with Cheri we are using unity which is included directly in the project. + +## GPU worker build + +The `gpu_worker` binary required by the `gpu_process` is assumed to be present in corresponding `target` directory: +``` +cargo build --bin gpu_worker --features $(gpu-arch),gpu_process --target $(arch)-unknown-linux-gnu [--release] +``` + +Where `gpu-arch` is either `cuda` or `hip`. + +Also make sure that shared memory objects are executable: +``` +sudo mount -o remount,exec /dev/shm +``` + +### GPU worker path + +To use a `gpu_worker` that is not at the original location it was built in, set the `GPU_WORKER_PATH` environment variable to point to the desired binary + +## GPU engine library path +`DANDELION_LIBRARY_PATH` overwrites the directory where the GPU engines will look for kernel libraries. If the variable is unset the engines will look in `machine_interface/tests/libs/`. + +## GPU Allocations +To prevent memory leakage, GPU kernels are disallowed from calling `malloc()`. All the memory a kernel requires should be specified in the respective config file. diff --git a/dandelion_commons/Cargo.toml b/dandelion_commons/Cargo.toml index 788db501..b56c4f22 100644 --- a/dandelion_commons/Cargo.toml +++ b/dandelion_commons/Cargo.toml @@ -5,6 +5,8 @@ edition = "2021" [features] timestamp = [] +reuse_weights = [] +auto_batching = [] [dependencies] hdrhist = "0.5.0" diff --git a/dandelion_commons/src/lib.rs b/dandelion_commons/src/lib.rs index 8968ca58..04af3412 100644 --- a/dandelion_commons/src/lib.rs +++ b/dandelion_commons/src/lib.rs @@ -114,6 +114,17 @@ pub enum DandelionError { OtherProctionError, /// Work queue from the dispatcher to the engines is full WorkQueueFull, + // GPU engine specfific errors + /// error from HIP Runtime + HipError(String), + /// error from HIP Runtime + CudaError(String), + /// identifier used in config file was not declared before + UndeclaredIdentifier(String), + /// argument given to the FromInput sizing was out of bounds + FromInputOutOfBounds, + /// could not deserialise JSON for config + ParsingJSONError(String), } // Implement display to be compliant with core::error::Error diff --git a/dandelion_commons/src/records.rs b/dandelion_commons/src/records.rs index 43dd2986..c79aa642 100644 --- a/dandelion_commons/src/records.rs +++ b/dandelion_commons/src/records.rs @@ -1,11 +1,12 @@ use crate::FunctionId; use core::fmt; +use std::sync::{Arc, Mutex}; use std::time::Instant; /// Maximum usize to expect when converting a record point to a usize /// By setting the last element to this explicitly, the compiler will throw an error, /// if there are more than this, because it enumerates from 0 and won't allow a number to be assigned twice. -const LAST_RECORD_POINT: usize = 17; +const LAST_RECORD_POINT: usize = 25; #[repr(usize)] #[derive(Clone, Copy, Debug, PartialEq)] @@ -44,6 +45,23 @@ pub enum RecordPoint { EngineStart, /// End execution of the function on the engine (sync) EngineEnd, + /// --- GPU --- + /// Start GPU inputs and buffers load (sync) + GPUTransferStart, + /// End GPU inputs and buffers load (sync) + GPUTransferEnd, + /// Start GPU kernel executions (sync) + GPUInferenceStart, + /// End GPU kernel executions (sync) + GPUInferenceEnd, + /// Start GPU output read (sync) + GPUOutputStart, + /// End GPU output read (sync) + GPUOutputEnd, + /// Start GPU output read (sync) + BatchAtomStart, + /// End GPU output read (sync) + BatchAtomEnd, /// Return from execution engine (async) FutureReturn = LAST_RECORD_POINT, } @@ -165,10 +183,102 @@ impl TimestampArchive { } } +#[cfg(feature = "reuse_weights")] +struct ReuseWeightsArchive { + collected_gpu_cache_hit: std::sync::Mutex>, + collected_gpu_id: std::sync::Mutex>, +} + +#[cfg(feature = "reuse_weights")] +impl ReuseWeightsArchive { + fn init() -> Self { + return Self { + collected_gpu_cache_hit: std::sync::Mutex::new(Vec::new()), + collected_gpu_id: std::sync::Mutex::new(Vec::new()), + }; + } + + fn insert(&self, new_gpu_cache_hit: bool, new_gpu_id: u8) { + let mut guard_cache = self.collected_gpu_cache_hit.lock().unwrap(); + guard_cache.push(new_gpu_cache_hit); + + let mut guard_gpu = self.collected_gpu_id.lock().unwrap(); + guard_gpu.push(new_gpu_id); + } + + fn reset(&self) { + let mut guard_cache = self.collected_gpu_cache_hit.lock().unwrap(); + *guard_cache = Vec::new(); + + let mut guard_gpu = self.collected_gpu_id.lock().unwrap(); + *guard_gpu = Vec::new(); + } + + fn append_gpu_info(&self, gpu_info: (bool, u8), summary: &mut String, indent: usize) { + // push self + summary.push_str(&format!( + "{}gpu_cache_hit:{}, gpu_id:{}", + "-".repeat(indent), + gpu_info.0, + gpu_info.1, + )); + } + + fn get_summary(&self, summary: &mut String) { + for (gpu_cache_hit, gpu_id) in self.collected_gpu_cache_hit.lock().unwrap().iter().zip(self.collected_gpu_id.lock().unwrap().iter()) { + let gpu_info = (*gpu_cache_hit, *gpu_id); + self.append_gpu_info(gpu_info, summary, 0); + summary.push_str("\n"); + } + } +} + +#[cfg(feature = "auto_batching")] +struct BatchArchive { + collected_batch_size: std::sync::Mutex>, +} + +#[cfg(feature = "auto_batching")] +impl BatchArchive { + fn init() -> Self { + return Self { + collected_batch_size: std::sync::Mutex::new(Vec::new()), + }; + } + + fn insert(&self, new_batch_size: usize) { + let mut guard = self.collected_batch_size.lock().unwrap(); + guard.push(new_batch_size); + } + + fn reset(&self) { + let mut guard = self.collected_batch_size.lock().unwrap(); + *guard = Vec::new(); + } + + fn append_batch_size(&self, batch_size: usize, summary: &mut String, indent: usize) { + // push self + summary.push_str(&format!("{}batch_size:{}", "-".repeat(indent), batch_size)); + } + + fn get_summary(&self, summary: &mut String) { + for recorder in self.collected_batch_size.lock().unwrap().iter() { + self.append_batch_size(*recorder, summary, 0); + summary.push_str("\n"); + } + } +} + /// General implementation of recorder struct, additional functionality enabled by flags pub struct Recorder { #[cfg(feature = "timestamp")] timestamps: std::sync::Arc, + #[cfg(feature = "reuse_weights")] + gpu_cache_hit: Arc>, + #[cfg(feature = "reuse_weights")] + gpu_id: Arc>, + #[cfg(feature = "auto_batching")] + batch_size: Arc>, } impl Recorder { @@ -176,6 +286,12 @@ impl Recorder { return Self { #[cfg(feature = "timestamp")] timestamps: FunctionTimestamp::new(_function_id, _start), + #[cfg(feature = "reuse_weights")] + gpu_cache_hit: Arc::new(Mutex::new(false)), + #[cfg(feature = "reuse_weights")] + gpu_id: Arc::new(Mutex::new(u8::MAX)), + #[cfg(feature = "auto_batching")] + batch_size: Arc::new(Mutex::new(0)), }; } @@ -183,6 +299,12 @@ impl Recorder { return Self { #[cfg(feature = "timestamp")] timestamps: FunctionTimestamp::new(_function_id, _parent.timestamps.creation), + #[cfg(feature = "reuse_weights")] + gpu_cache_hit: Arc::new(Mutex::new(false)), + #[cfg(feature = "reuse_weights")] + gpu_id: Arc::new(Mutex::new(u8::MAX)), + #[cfg(feature = "auto_batching")] + batch_size: Arc::new(Mutex::new(0)), }; } @@ -191,6 +313,24 @@ impl Recorder { self.timestamps.record(_current_point); } + pub fn set_gpu_info(&mut self, _gpu_cache_hit: bool, _gpu_id: u8) { + #[cfg(feature = "reuse_weights")] + { + let mut gpu_cache_hit = self.gpu_cache_hit.lock().unwrap(); + *gpu_cache_hit = _gpu_cache_hit; + let mut gpu_id = self.gpu_id.lock().unwrap(); + *gpu_id = _gpu_id; + } + } + + pub fn set_batch_size(&mut self, _batch_size: usize) { + #[cfg(feature = "auto_batching")] + { + let mut batch_size = self.batch_size.lock().unwrap(); + *batch_size = _batch_size; + } + } + pub fn add_children(&mut self, _new_children: Vec) { #[cfg(feature = "timestamp")] for child in _new_children { @@ -202,6 +342,12 @@ impl Recorder { let recorder = Recorder { #[cfg(feature = "timestamp")] timestamps: self.timestamps.clone(), + #[cfg(feature = "reuse_weights")] + gpu_cache_hit: self.gpu_cache_hit.clone(), + #[cfg(feature = "reuse_weights")] + gpu_id: self.gpu_id.clone(), + #[cfg(feature = "auto_batching")] + batch_size: self.batch_size.clone(), }; return recorder; } @@ -218,6 +364,28 @@ impl fmt::Display for Recorder { } self.timestamps.fmt(_f)?; } + #[cfg(feature = "reuse_weights")] + { + if std::sync::Arc::strong_count(&self.gpu_cache_hit) != 1 + && std::sync::Arc::weak_count(&self.gpu_cache_hit) != 0 + { + panic!("Trying to format recorder that still has more than one reference"); + } + #[cfg(feature = "timestamp")] + write!(_f, ",")?; + write!(_f, " gpu_cache_hit: {}, gpu_id: {}", self.gpu_cache_hit.lock().unwrap(), self.gpu_id.lock().unwrap())?; + } + #[cfg(feature = "auto_batching")] + { + if std::sync::Arc::strong_count(&self.batch_size) != 1 + && std::sync::Arc::weak_count(&self.batch_size) != 0 + { + panic!("Trying to format recorder that still has more than one reference"); + } + #[cfg(feature = "timestamp")] + write!(_f, ",")?; + write!(_f, " batch_size: {}", self.batch_size.lock().unwrap())?; + } Ok(()) } } @@ -225,6 +393,10 @@ impl fmt::Display for Recorder { pub struct Archive { #[cfg(feature = "timestamp")] timestamp_archive: TimestampArchive, + #[cfg(feature = "reuse_weights")] + gpu_info_archive: ReuseWeightsArchive, + #[cfg(feature = "auto_batching")] + batch_archive: BatchArchive, } pub struct ArchiveInit { @@ -237,6 +409,10 @@ impl Archive { return Archive { #[cfg(feature = "timestamp")] timestamp_archive: TimestampArchive::init(), + #[cfg(feature = "reuse_weights")] + gpu_info_archive: ReuseWeightsArchive::init(), + #[cfg(feature = "auto_batching")] + batch_archive: BatchArchive::init(), }; } @@ -244,6 +420,15 @@ impl Archive { #[cfg(feature = "timestamp")] self.timestamp_archive .insert(std::sync::Arc::into_inner(_recorder.timestamps).unwrap()); + #[cfg(feature = "reuse_weights")] + self.gpu_info_archive.insert( + std::sync::Arc::into_inner((*_recorder.gpu_cache_hit.lock().unwrap()).into()).unwrap(), + std::sync::Arc::into_inner((*_recorder.gpu_id.lock().unwrap()).into()).unwrap(), + ); + #[cfg(feature = "auto_batching")] + self.batch_archive.insert( + std::sync::Arc::into_inner((*_recorder.batch_size.lock().unwrap()).into()).unwrap(), + ); } pub fn get_summary(&self) -> String { @@ -252,11 +437,20 @@ impl Archive { let mut summary = String::new(); #[cfg(feature = "timestamp")] self.timestamp_archive.get_summary(&mut summary); + #[cfg(feature = "reuse_weights")] + self.gpu_info_archive.get_summary(&mut summary); + #[cfg(feature = "auto_batching")] + self.batch_archive.get_summary(&mut summary); + println!("{}", summary); return summary; } pub fn reset(&self) { #[cfg(feature = "timestamp")] self.timestamp_archive.reset(); + #[cfg(feature = "reuse_weights")] + self.gpu_info_archive.reset(); + #[cfg(feature = "auto_batching")] + self.batch_archive.reset(); } } diff --git a/dispatcher/Cargo.toml b/dispatcher/Cargo.toml index 77a5f77b..7985eb97 100644 --- a/dispatcher/Cargo.toml +++ b/dispatcher/Cargo.toml @@ -22,4 +22,7 @@ wasm = ["machine_interface/wasm"] mmu = ["machine_interface/mmu"] kvm = ["machine_interface/kvm"] reqwest_io = ["machine_interface/reqwest_io"] -timestamp = ["dandelion_commons/timestamp"] \ No newline at end of file +timestamp = ["dandelion_commons/timestamp"] +gpu = ["machine_interface/gpu"] +gpu_queue = [] +auto_batching = [] diff --git a/dispatcher/src/composition.rs b/dispatcher/src/composition.rs index 4bf35d74..0fe87fcf 100644 --- a/dispatcher/src/composition.rs +++ b/dispatcher/src/composition.rs @@ -400,6 +400,28 @@ impl From<(usize, Vec>)> for CompositionSet { } } +// TODO : is there a better way? +#[cfg(feature = "auto_batching")] +use machine_interface::function_driver::AtomInputs; +#[cfg(feature = "auto_batching")] +impl Into for CompositionSet { + fn into(self) -> AtomInputs { + AtomInputs { + item_list: self.item_list.clone(), + set_index: self.set_index.clone(), + } + } +} +#[cfg(feature = "auto_batching")] +impl From for CompositionSet { + fn from(atom_inputs: AtomInputs) -> CompositionSet { + CompositionSet { + item_list: atom_inputs.item_list.clone(), + set_index: atom_inputs.set_index.clone(), + } + } +} + pub struct CompositionSetTransferIterator<'origin> { /// set for which this iterator is implemented set_iterator: std::slice::Iter<'origin, (u32, usize, Arc)>, diff --git a/dispatcher/src/dispatcher.rs b/dispatcher/src/dispatcher.rs index 6d59c3ec..4060739a 100644 --- a/dispatcher/src/dispatcher.rs +++ b/dispatcher/src/dispatcher.rs @@ -2,7 +2,7 @@ use crate::{ composition::{ get_sharding, Composition, CompositionSet, InputSetDescriptor, JoinStrategy, ShardingMode, }, - execution_qs::EngineQueue, + execution_qs::{EngineQueue, EngineQueueGPU}, function_registry::{FunctionRegistry, FunctionType, Metadata}, resource_pool::ResourcePool, }; @@ -19,35 +19,93 @@ use futures::{ use itertools::Itertools; use log::{debug, trace}; use machine_interface::{ - function_driver::{Driver, FunctionConfig, WorkToDo}, + function_driver::{Driver, FunctionConfig, WorkQueue, WorkToDo}, machine_config::{ - get_available_domains, get_available_drivers, get_compatibilty_table, DomainType, - EngineType, + get_available_domains, get_available_drivers, get_compatibilty_table, DomainType, EngineType }, memory_domain::{Context, MemoryDomain, MemoryResource}, + promise::Promise, }; use std::{ collections::{BTreeMap, BTreeSet}, sync::Arc, }; +#[cfg(feature = "auto_batching")] +use crate::execution_qs::BatchingQueue; +#[cfg(feature = "auto_batching")] +use machine_interface::function_driver::{AtomInputs, BatchInfo, WorkDone}; + #[derive(Debug, Clone)] pub enum DispatcherInput { None, Set(CompositionSet), } +pub trait EnqueueWork: Send + Sync { + /// Enqueued work: + /// - ParsingArguments (only on registration) + /// - LoadingArguments + /// - (multiple) TransferArguments + /// - FunctionArguments + fn enqueue_work( + &self, + args: WorkToDo, + function_id: FunctionId, + #[cfg(feature = "auto_batching")] gpu_id: u8, + ) -> DandelionResult; +} + +pub trait FullQueue: EnqueueWork + WorkQueue { + fn clone_full_queue(&self) -> Box; + fn upcast_work_queue(&self) -> Box<(dyn WorkQueue + Send + Sync)>; + fn upcast_enqueue_work(&self) -> Box<(dyn EnqueueWork)>; +} + +impl Clone for Box { + fn clone(&self) -> Box { + self.clone_full_queue() + } +} + +impl FullQueue for T +where + T: EnqueueWork + WorkQueue + Send + Sync + Clone + 'static, +{ + fn clone_full_queue(&self) -> Box { + Box::new(self.clone()) + } + + fn upcast_work_queue(&self) -> Box<(dyn WorkQueue + Send + Sync)> { + Box::new(self.clone()) + } + + fn upcast_enqueue_work(&self) -> Box<(dyn EnqueueWork)> { + Box::new(self.clone()) + } +} + // TODO here and in registry can probably replace driver and loader function maps with fixed size arrays // That have compile time size and static indexing // TODO also here and in registry replace Arc Box with static references from leaked boxes for things we expect to be there for // the entire execution time anyway pub struct Dispatcher { - domains: BTreeMap>, Box)>, - engine_queues: BTreeMap>, + domains: BTreeMap>, Box)>, + engine_queues: BTreeMap>, type_map: BTreeMap, function_registry: FunctionRegistry, } +fn get_queue_from_engine(engine_type: EngineType) -> Box { + return match engine_type { + #[cfg(all(feature = "gpu", feature = "auto_batching"))] + EngineType::GpuThread => Box::new(BatchingQueue::new()), + #[cfg(all(feature = "gpu", feature = "gpu_queue"))] + EngineType::GpuThread => Box::new(EngineQueueGPU::new()), + _ => Box::new(EngineQueue::new()), + }; +} + impl Dispatcher { pub fn init( mut resource_pool: ResourcePool, @@ -61,17 +119,17 @@ impl Dispatcher { // Insert a work queue for each domain and use up all engine resource available let mut domain_map = BTreeMap::new(); let mut engine_queues = BTreeMap::new(); - let mut registry_drivers: BTreeMap)> = + let mut registry_drivers: BTreeMap)> = BTreeMap::new(); for (engine_type, driver) in drivers.into_iter() { - let work_queue = Box::new(EngineQueue::new()); + let work_queue = get_queue_from_engine(engine_type); while let Ok(Some(resource)) = resource_pool.sync_acquire_engine_resource(engine_type) { - driver.start_engine(resource, work_queue.clone())?; + driver.start_engine(resource, work_queue.upcast_work_queue())?; } let domain_type = type_map.get(&engine_type).unwrap(); let domain = domains.get(domain_type).unwrap().clone(); - domain_map.insert(*domain_type, (domain, work_queue.clone())); - engine_queues.insert(engine_type, work_queue.clone()); + domain_map.insert(*domain_type, (domain, work_queue.upcast_enqueue_work())); + engine_queues.insert(engine_type, work_queue.upcast_enqueue_work()); registry_drivers.insert( engine_type, (driver as &'static dyn Driver, work_queue.clone()), @@ -442,48 +500,173 @@ impl Dispatcher { if let Some(alternative) = options.iter().next() { match &alternative.function_type { FunctionType::Function(engine_id, ctx_size) => { - recorder.record(RecordPoint::PrepareEnvQueue); - let (context, config, metadata) = self - .prepare_for_engine( - function_id, - *engine_id, - inputs, - *ctx_size, - non_caching, - recorder.get_sub_recorder(), - ) - .await?; - recorder.record(RecordPoint::GetEngineQueue); - trace!("running function {} on {:?} type engine with input sets {:?} and output sets {:?}", - function_id, - *engine_id, - metadata.input_sets.iter().map(|(name, _)| name).collect_vec(), - metadata.output_sets); - let context = self - .run_on_engine( - *engine_id, - config, - metadata.output_sets, - context, - recorder.get_sub_recorder(), - ) - .await?; - let context_arc = Arc::new(context); - - let composition_sets = context_arc - .content - .iter() - .enumerate() - .map(|(function_set_id, data_option)| { - data_option.as_ref().and_then(|_| { - Some(CompositionSet::from(( - function_set_id, - vec![context_arc.clone()], - ))) - }) - }) - .collect(); - return Ok(composition_sets); + match engine_id { + #[cfg(feature = "auto_batching")] + EngineType::GpuThread => { + let context_id = match self.type_map.get(engine_id) { + Some(id) => id, + None => return Err(DandelionError::Dispatcher(DispatcherError::ConfigError)), + }; + let (_, transfer_queue) = match self.domains.get(context_id) { + Some(d) => d, + None => return Err(DandelionError::Dispatcher(DispatcherError::ConfigError)), + }; + + let mut atom_inputs = Vec::new(); + for input in inputs.clone() { + if input.is_some() { + let atom_input: AtomInputs = input.unwrap().into(); + atom_inputs.push(Some(atom_input)); + } else { + atom_inputs.push(None); + } + } + + let args = WorkToDo::BatchAtom { + function_id, + inputs: atom_inputs, + recorder: recorder.get_sub_recorder(), + inputs_vec: None, + children_debts: None, + gpu_id: None, + }; + + recorder.record(RecordPoint::BatchAtomStart); + let batch_info = transfer_queue.enqueue_work(args, function_id, u8::MAX)?.await?.get_shared_context(); + recorder.record(RecordPoint::BatchAtomEnd); + + let batch_pos = batch_info.batch_pos; + + let context_arc = if batch_pos == 0 { + let inputs_vec = batch_info.inputs_vec.unwrap(); + let gpu_id = batch_info.gpu_id.unwrap(); + + // variable inputs needs to contain ALL INPUTS, with indexed names: input0, input1, ... + let mut compositions_vec = Vec::new(); + for input in inputs_vec { + if input.is_some() { + let composition_set = CompositionSet::from(input.unwrap()); + compositions_vec.push(Some(composition_set)); + } else { + compositions_vec.push(None); + } + } + + recorder.record(RecordPoint::PrepareEnvQueue); + let (context, config, metadata) = self + .prepare_for_engine( + function_id, + *engine_id, + compositions_vec, + *ctx_size, + non_caching, + recorder.get_sub_recorder(), + gpu_id, + ) + .await?; + recorder.record(RecordPoint::GetEngineQueue); + + trace!("running function {} on {:?} type engine with input sets {:?} and output sets {:?}", + function_id, + *engine_id, + metadata.input_sets.iter().map(|(name, _)| name).collect_vec(), + metadata.output_sets); + + let context = self + .run_on_engine( + function_id, + *engine_id, + config, + metadata.output_sets, + context, + recorder.get_sub_recorder(), + gpu_id, + ) + .await?; + let context_arc = Arc::new(context); + + let mut debts = batch_info.children_debts.unwrap(); + for i in (1..=debts.len()).rev() { + let debt = debts.pop().unwrap(); + debt.fulfill(Ok(WorkDone::SharedContext(BatchInfo { + batch_pos: i, + inputs_vec: None, + context_arc: Some(context_arc.clone()), + children_debts: None, + gpu_id: None, + }))); + } + + context_arc + } else { + batch_info.context_arc.unwrap() + }; + + let composition_sets = context_arc + .content + .iter() + .enumerate() + .filter(|(function_set_id, _)| *function_set_id == batch_pos) + .map(|(function_set_id, data_option)| { + data_option.as_ref().and_then(|_| { + Some(CompositionSet::from(( + function_set_id, + vec![context_arc.clone()], + ))) + }) + }) + .collect(); + + return Ok(composition_sets); + }, + _ => { + recorder.record(RecordPoint::PrepareEnvQueue); + let (context, config, metadata) = self + .prepare_for_engine( + function_id, + *engine_id, + inputs, + *ctx_size, + non_caching, + recorder.get_sub_recorder(), + #[cfg(feature = "auto_batching")] u8::MAX, + ) + .await?; + recorder.record(RecordPoint::GetEngineQueue); + trace!("running function {} on {:?} type engine with input sets {:?} and output sets {:?}", + function_id, + *engine_id, + metadata.input_sets.iter().map(|(name, _)| name).collect_vec(), + metadata.output_sets); + let context = self + .run_on_engine( + function_id, + *engine_id, + config, + metadata.output_sets, + context, + recorder.get_sub_recorder(), + #[cfg(feature = "auto_batching")] u8::MAX, + ) + .await?; + let context_arc = Arc::new(context); + + let composition_sets = context_arc + .content + .iter() + .enumerate() + .map(|(function_set_id, data_option)| { + data_option.as_ref().and_then(|_| { + Some(CompositionSet::from(( + function_set_id, + vec![context_arc.clone()], + ))) + }) + }) + .collect(); + return Ok(composition_sets); + } + } } FunctionType::Composition(composition) => { return self @@ -507,6 +690,7 @@ impl Dispatcher { ctx_size: usize, non_caching: bool, mut recorder: Recorder, + #[cfg(feature = "auto_batching")] gpu_id: u8, ) -> DandelionResult<(Context, FunctionConfig, Metadata)> { debug!("Preparing function {} for engine", function_id); let metadata = self.function_registry.get_metadata(function_id).await?; @@ -529,8 +713,10 @@ impl Dispatcher { ctx_size, non_caching, recorder.get_sub_recorder(), + #[cfg(feature = "auto_batching")] gpu_id, ) .await?; + recorder.record(RecordPoint::TransferQueue); // make sure all input sets are there at the correct index let mut static_sets = BTreeSet::new(); for (function_set_index, (in_set_name, metadata_set)) in @@ -562,9 +748,11 @@ impl Dispatcher { source_item_index: item, recorder: recorder.get_sub_recorder(), }; - recorder.record(RecordPoint::TransferQueue); - function_context = transfer_queue.enqueu_work(args).await?.get_context(); - recorder.record(RecordPoint::TransferDequeue); + function_context = transfer_queue.enqueue_work( + args, + function_id, + #[cfg(feature = "auto_batching")] gpu_id, + )?.await?.get_context(); function_buffer += 1; } } @@ -608,22 +796,27 @@ impl Dispatcher { source_item_index: item, recorder: recorder.get_sub_recorder(), }; - recorder.record(RecordPoint::TransferQueue); - function_context = transfer_queue.enqueu_work(args).await?.get_context(); - recorder.record(RecordPoint::TransferDequeue); + function_context = transfer_queue.enqueue_work( + args, + function_id, + #[cfg(feature = "auto_batching")] gpu_id, + )?.await?.get_context(); function_item += 1; } } + recorder.record(RecordPoint::TransferDequeue); return Ok((function_context, function_config, metadata)); } async fn run_on_engine( &self, + function_id: FunctionId, engine_type: EngineType, function_config: FunctionConfig, output_sets: Arc>, function_context: Context, mut recorder: Recorder, + #[cfg(feature = "auto_batching")] gpu_id: u8, ) -> DandelionResult { // preparation is done, get engine to receive engine debug!( @@ -642,7 +835,11 @@ impl Dispatcher { recorder: subrecoder, }; recorder.record(RecordPoint::ExecutionQueue); - let result = engine_queue.enqueu_work(args).await?.get_context(); + let result = engine_queue.enqueue_work( + args, + function_id, + #[cfg(feature = "auto_batching")] gpu_id, + )?.await?.get_context(); recorder.record(RecordPoint::FutureReturn); return Ok(result); } diff --git a/dispatcher/src/execution_qs.rs b/dispatcher/src/execution_qs.rs index e6ff3adb..8803d91e 100644 --- a/dispatcher/src/execution_qs.rs +++ b/dispatcher/src/execution_qs.rs @@ -1,14 +1,30 @@ +use crate::dispatcher::{EnqueueWork, FullQueue}; use core::sync::atomic::{AtomicUsize, Ordering}; use crossbeam::channel::{TryRecvError, TrySendError}; -use dandelion_commons::{DandelionError, DandelionResult}; +use dandelion_commons::{DandelionError, DandelionResult, FunctionId}; use log::error; use machine_interface::{ - function_driver::{WorkDone, WorkQueue, WorkToDo}, - promise::{Debt, PromiseBuffer}, + function_driver::{WorkQueue, WorkToDo}, + promise::{Debt, Promise, PromiseBuffer}, +}; +use std::{ + cell::Cell, + cmp, + collections::{HashMap, VecDeque}, + env, + hint, + sync::{Arc, Mutex}, + time::{Duration, Instant, SystemTime, UNIX_EPOCH}, }; -use std::{hint, sync::Arc}; -const MAX_QUEUE: usize = 4096; +const MAX_QUEUE: usize = 16384; +const GPU_NUMBER: usize = 4; +const FRONT_QUEUE: usize = 10; // Specify how many requests can be looked at each polling iteration; limit it to lower polling time +const MAX_QUEUE_TIME: Duration = Duration::new(0, 30_000_000); // 30 ms +const MAX_IDLE_TIME: Duration = Duration::new(0, 5_000_000); // 5 ms +const IDLE_BEFORE_BATCH_FORCED: Duration = Duration::new(0, 5_000_000); // 5 ms +const SOFT_MIN_BATCHED: usize = 32; +const MAX_BATCHED: usize = 512; struct AtomicTickets { start: AtomicUsize, @@ -80,6 +96,28 @@ impl WorkQueue for EngineQueue { } } +impl EnqueueWork for EngineQueue { + fn enqueue_work( + &self, + args: WorkToDo, + function_id: FunctionId, + #[cfg(feature = "auto_batching")] gpu_id: u8, + ) -> DandelionResult { + let (promise, debt) = self.promise_buffer.get_promise()?; + match self.queue_in.try_send((args, debt)) { + Ok(()) => (), + Err(TrySendError::Disconnected(_)) => { + error!("Failed to enqueu work, workqueue has been disconnected") + } + Err(TrySendError::Full(_)) => return Err(DandelionError::WorkQueueFull), + } + return Ok(promise); + } +} + +unsafe impl Send for EngineQueue {} +unsafe impl Sync for EngineQueue {} + impl EngineQueue { pub fn new() -> Self { let (sender, receiver) = crossbeam::channel::bounded(MAX_QUEUE); @@ -95,16 +133,574 @@ impl EngineQueue { promise_buffer: PromiseBuffer::init(MAX_QUEUE), }; } +} + +pub struct EngineQueueGPU { + engine_counter: Cell, + engine_id: u8, + general_queue_in: crossbeam::channel::Sender<(WorkToDo, Debt)>, + general_queue_out: crossbeam::channel::Receiver<(WorkToDo, Debt)>, + function_queue: Arc>>, + last_function: Arc>>, + idle_since: Cell, + worker_queue: Arc, + promise_buffer: PromiseBuffer, +} + +impl WorkQueue for EngineQueueGPU { + fn get_engine_args(&self) -> (WorkToDo, Debt) { + self.idle_since.set(Instant::now()); + let work = loop { + // Make sure that engines acquire the lock in a round-robin fashion + let local_ticket = self.worker_queue.end.fetch_add(1, Ordering::AcqRel); + while local_ticket != self.worker_queue.start.load(Ordering::Acquire) { + hint::spin_loop(); + } + + // If some non-FunctionArguments is ready, run it + let _ = match self.general_queue_out.try_recv() { + Err(TryRecvError::Disconnected) => panic!("Work queue disconnected"), + Ok(recieved) => { + let (recieved_args, recevied_dept) = recieved; + if recevied_dept.is_alive() { + break (recieved_args, recevied_dept); + } else { + () + } + } + Err(TryRecvError::Empty) => (), + }; + // Else, try to run FunctionArguments WorkToDo + + let mut function_queue = self.function_queue.lock().unwrap(); + + let mut last_function = self.last_function.lock().unwrap(); + let last_function_id = last_function.get(&self.engine_id).unwrap(); + + if !function_queue.is_empty() { + let (first_timestamp, first_function_id, _, _) = &function_queue[0]; + + let now = Instant::now(); + let mut send_idx = usize::MAX; + + // Send first in the queue if: + if *last_function_id == u64::MAX { + // The engine hasn't run any function yet + send_idx = 0; + } else if now.duration_since(self.idle_since.get()) > MAX_IDLE_TIME { + // The engine has been idle for too much time + send_idx = 0; + } else if now.duration_since(*first_timestamp) > MAX_QUEUE_TIME { + // The first request has waited too long + send_idx = 0; + } + + if send_idx == usize::MAX { + let loop_max = std::cmp::min(FRONT_QUEUE, function_queue.len()); + // Send i-th request if: + for i in 0..loop_max { + let (_, function_id, _, _) = &function_queue[i]; + + // The i-th request is of the model loaded by the engine + if *last_function_id == *function_id { + send_idx = i; + break; + } + } + } + + // Send first request if: + // 1) no other request has been chosen + if send_idx == usize::MAX { + let mut someone_has_it = false; + for key in last_function.keys() { + if *key != self.engine_id { + let last_function_id = last_function.get(key).unwrap(); + if *last_function_id == *first_function_id { + someone_has_it = true; + break; + } + } + } + // 2) no other engine has the function loaded + if !someone_has_it { + send_idx = 0; + } + } + + // Actual send, if: + if send_idx != usize::MAX { + match function_queue.remove(send_idx) { + Some((_, function_id, args, debt)) => { + // The debt is still alive + if debt.is_alive() { + last_function.insert(self.engine_id, function_id); + break (args, debt); + } + } + None => (), + } + } + } + + // If no WorkToDo is selected at all, update the counter + self.worker_queue.start.fetch_add(1, Ordering::Release); + }; + // Exiting the loop, update the counter + self.worker_queue.start.fetch_add(1, Ordering::Release); + return work; + } + /// models called with inter-call times that are randomly selected, to "overlap differently" + + fn try_get_engine_args(&self) -> Option<(WorkToDo, Debt)> { + // TODO + return None; + } +} - pub async fn enqueu_work(&self, args: WorkToDo) -> DandelionResult { +impl EnqueueWork for EngineQueueGPU { + fn enqueue_work( + &self, + args: WorkToDo, + function_id: FunctionId, + #[cfg(feature = "auto_batching")] gpu_id: u8, + ) -> DandelionResult { let (promise, debt) = self.promise_buffer.get_promise()?; - match self.queue_in.try_send((args, debt)) { - Ok(()) => (), - Err(TrySendError::Disconnected(_)) => { - error!("Failed to enqueu work, workqueue has been disconnected") + + match args { + WorkToDo::FunctionArguments { .. } => { + let now = Instant::now(); + self.function_queue + .lock() + .unwrap() + .push_back((now, function_id, args, debt)); } - Err(TrySendError::Full(_)) => return Err(DandelionError::WorkQueueFull), + _ => match self.general_queue_in.try_send((args, debt)) { + Ok(()) => (), + Err(TrySendError::Disconnected(_)) => { + error!("Failed to enqueu work, workqueue has been disconnected") + } + Err(TrySendError::Full(_)) => return Err(DandelionError::WorkQueueFull), + }, + } + + return Ok(promise); + } +} + +unsafe impl Send for EngineQueueGPU {} +unsafe impl Sync for EngineQueueGPU {} + +impl EngineQueueGPU { + pub fn new() -> Self { + let engine_counter = Cell::new(0); + let engine_id = 0; + + let (sender, receiver) = crossbeam::channel::bounded(MAX_QUEUE); + + let function_queue = Arc::new(Mutex::new(VecDeque::with_capacity(MAX_QUEUE))); + + let last_function = Arc::new(Mutex::new(HashMap::new())); + + let idle_since = Cell::new(Instant::now()); + + let tickets = AtomicTickets { + start: AtomicUsize::new(0), + end: AtomicUsize::new(0), + }; + let worker_queue = Arc::new(tickets); + + let promise_buffer = PromiseBuffer::init(MAX_QUEUE); + + return EngineQueueGPU { + engine_counter, + engine_id, + general_queue_in: sender, + general_queue_out: receiver, + function_queue, + last_function, + idle_since, + worker_queue, + promise_buffer, + }; + } +} + +impl Clone for EngineQueueGPU { + fn clone(&self) -> EngineQueueGPU { + self.engine_counter.set(self.engine_counter.get() + 1); + { + let mut last_function = self.last_function.lock().unwrap(); + last_function.insert(self.engine_counter.get(), u64::MAX); + } + + EngineQueueGPU { + engine_counter: self.engine_counter.clone(), + engine_id: self.engine_counter.get(), + general_queue_in: self.general_queue_in.clone(), + general_queue_out: self.general_queue_out.clone(), + function_queue: self.function_queue.clone(), + idle_since: Cell::new(Instant::now()), + last_function: self.last_function.clone(), + worker_queue: self.worker_queue.clone(), + promise_buffer: self.promise_buffer.clone(), + } + } +} + +#[cfg(feature = "auto_batching")] +pub struct BatchingQueue { + engine_counter: Cell, + engine_id: u8, + general_queue_in: Vec>, + general_queue_out: Vec>, + atoms_times: Arc>>, + atoms_map: Arc>>>, + last_function: Arc>>, + idle_since: Cell, + worker_queue: Arc, + promise_buffer: PromiseBuffer, +} + +#[cfg(feature = "auto_batching")] +impl WorkQueue for BatchingQueue { + fn get_engine_args(&self) -> (WorkToDo, Debt) { + let FIXED_BATCH_SIZE: usize = match env::var("BATCH_SIZE") { + Ok(value) => value.parse::().unwrap(), + Err(e) => 2, + }; + + self.idle_since.set(Instant::now()); + let work = loop { + // Make sure that engines acquire the lock in a round-robin fashion + let local_ticket = self.worker_queue.end.fetch_add(1, Ordering::AcqRel); + while local_ticket != self.worker_queue.start.load(Ordering::Acquire) { + hint::spin_loop(); + } + + // If some non-FunctionArguments is ready, run it + let _ = match self.general_queue_out[self.engine_id as usize].try_recv() { + Err(TryRecvError::Disconnected) => panic!("Work queue disconnected"), + Ok(recieved) => { + let (recieved_args, recevied_dept) = recieved; + if recevied_dept.is_alive() { + break (recieved_args, recevied_dept); + } else { + () + } + } + Err(TryRecvError::Empty) => (), + }; + // Else, try to run BatchAtom WorkToDo + + let mut atoms_times = self.atoms_times.lock().unwrap(); + + let mut last_function = self.last_function.lock().unwrap(); + let last_function_id = last_function.get(&self.engine_id).unwrap(); + + let mut atoms_map = self.atoms_map.lock().unwrap(); + + if !atoms_times.is_empty() { + let (first_timestamp, first_function_id) = &atoms_times[0]; + + let now = Instant::now(); + let mut send_idx = usize::MAX; + let mut send_function_id = u64::MAX; + + // Send first in the queue if: + if *last_function_id == u64::MAX { + // The engine hasn't run any function yet + send_idx = 0; + send_function_id = *first_function_id; + } else if now.duration_since(self.idle_since.get()) > MAX_IDLE_TIME { + // The engine has been idle for too much time + send_idx = 0; + send_function_id = *first_function_id; + } else if now.duration_since(*first_timestamp) > MAX_QUEUE_TIME { + // The first request has waited too long + send_idx = 0; + send_function_id = *first_function_id; + } + + if send_idx == usize::MAX { + let loop_max = std::cmp::min(FRONT_QUEUE, atoms_times.len()); + // Send i-th request if: + for i in 0..loop_max { + let (_, function_id) = &atoms_times[i]; + + // The i-th request is of the model loaded by the engine + if *last_function_id == *function_id { + send_idx = i; + send_function_id = *function_id; + break; + } + } + } + + // Send first request if: + // 1) no other request has been chosen, AND + if send_idx == usize::MAX { + let mut someone_has_it = false; + for key in last_function.keys() { + if *key != self.engine_id { + let last_function_id = last_function.get(key).unwrap(); + if *last_function_id == *first_function_id { + someone_has_it = true; + break; + } + } + } + // 2) no other engine has the function loaded + if !someone_has_it { + send_idx = 0; + send_function_id = *first_function_id; + } + } + + // Skip if batch would be too small: + if send_function_id != u64::MAX { + let mut requests_list = atoms_map.get_mut(&send_function_id).unwrap(); + if self.idle_since.get().elapsed() < IDLE_BEFORE_BATCH_FORCED && requests_list.len() < SOFT_MIN_BATCHED { + send_function_id = u64::MAX; + } + } + + // Actual send, if: + if send_function_id != u64::MAX { + let mut requests_list = atoms_map.get_mut(&send_function_id).unwrap(); + let (mut head_args, head_debt) = requests_list.pop_front().unwrap(); + + if head_debt.is_alive() { + // Prepare the batch: + let mut batched = 1; + let mut tmp_inputs_vec = Vec::new(); + let mut call_children_debts = Vec::new(); + + // Copy inputs of first request + if let WorkToDo::BatchAtom { + ref inputs, + function_id: _, + recorder: _, + inputs_vec: _, + children_debts: _, + gpu_id: _, + } = head_args + { + for input in inputs { + tmp_inputs_vec.push(input.clone().unwrap()); + } + } + + // Copy inputs of other requests + let inner_loop_max = cmp::min(requests_list.len(), MAX_BATCHED - 1); + batched += inner_loop_max; + for i in 1..=inner_loop_max { + let (mut child_args, child_debt) = requests_list.pop_front().unwrap(); + if let WorkToDo::BatchAtom { + inputs, + function_id: _, + ref mut recorder, + inputs_vec: _, + children_debts: _, + gpu_id: _, + } = child_args + { + for (i, input) in inputs.into_iter().enumerate() { + tmp_inputs_vec[i].item_list.extend(input.unwrap().item_list); + } + call_children_debts.push(child_debt); + + // Record the size of the batch + recorder.set_batch_size(batched); + } + } + + // Record the size of the batch + if let WorkToDo::BatchAtom { + inputs: _, + function_id: _, + ref mut recorder, + inputs_vec: _, + children_debts: _, + gpu_id: _, + } = head_args + { + recorder.set_batch_size(batched); + } + + // Workaround to support batch size 1 + if batched == 1 { + let copy_first_input = tmp_inputs_vec[0].item_list[0].clone(); + tmp_inputs_vec[0].item_list = vec![copy_first_input; FIXED_BATCH_SIZE]; + } + + // Remove the batched requests from the time-based queue + let mut idx = 0; + let mut removed = 0; + while removed < batched { + let (_, function_id) = &atoms_times[idx]; + if *function_id == send_function_id { + atoms_times.remove(idx); + removed += 1; + } else { + idx += 1; + } + } + + let mut call_inputs_vec = Vec::new(); + for set in tmp_inputs_vec { + call_inputs_vec.push(Some(set)); + } + + // Make head request ready to run whole batch + if let WorkToDo::BatchAtom { + ref mut inputs_vec, + ref mut children_debts, + function_id: _, + inputs: _, + recorder: _, + ref mut gpu_id, + } = head_args + { + *inputs_vec = Some(call_inputs_vec); + *children_debts = Some(call_children_debts); + *gpu_id = Some(self.engine_id); + } + + // last_function.insert(self.engine_id, send_function_id); + break (head_args, head_debt); + } + } + } + + // If no WorkToDo is selected at all, update the counter + self.worker_queue.start.fetch_add(1, Ordering::Release); + }; + // Exiting the loop, update the counter + self.worker_queue.start.fetch_add(1, Ordering::Release); + return work; + } + /// models called with inter-call times that are randomly selected, to "overlap differently" + + fn try_get_engine_args(&self) -> Option<(WorkToDo, Debt)> { + // TODO + return None; + } +} + +#[cfg(feature = "auto_batching")] +impl EnqueueWork for BatchingQueue { + fn enqueue_work( + &self, + args: WorkToDo, + function_id: FunctionId, + gpu_id: u8, + ) -> DandelionResult { + let (promise, debt) = self.promise_buffer.get_promise()?; + + match args { + #[cfg(feature = "auto_batching")] + WorkToDo::BatchAtom { .. } => { + let now = Instant::now(); + + let mut atoms_times = self.atoms_times.lock().unwrap(); + atoms_times.push_back((now, function_id)); + + let mut atoms_map = self.atoms_map.lock().unwrap(); + atoms_map + .entry(function_id) + .or_insert(VecDeque::with_capacity(MAX_QUEUE)); + atoms_map + .get_mut(&function_id) + .unwrap() + .push_back((args, debt)); + } + _ => match self.general_queue_in[gpu_id as usize].try_send((args, debt)) { + Ok(()) => (), + Err(TrySendError::Disconnected(_)) => { + error!("Failed to enqueu work, workqueue has been disconnected") + } + Err(TrySendError::Full(_)) => return Err(DandelionError::WorkQueueFull), + }, + } + + return Ok(promise); + } +} + +#[cfg(feature = "auto_batching")] +unsafe impl Send for BatchingQueue {} +#[cfg(feature = "auto_batching")] +unsafe impl Sync for BatchingQueue {} + +#[cfg(feature = "auto_batching")] +impl BatchingQueue { + pub fn new() -> Self { + let engine_counter = Cell::new(0); + let engine_id = 0; + + let mut general_queue_in = Vec::new(); + let mut general_queue_out = Vec::new(); + + // Each engine gets its own general_queue. + // This ensures that once a batch is defined to run on a GPU, + // it will be run on that GPU. + for i in 0..=GPU_NUMBER { + let (sender, receiver) = crossbeam::channel::bounded(MAX_QUEUE); + general_queue_in.push(sender); + general_queue_out.push(receiver); + } + + let atoms_times = Arc::new(Mutex::new(VecDeque::with_capacity(MAX_QUEUE))); + let atoms_map = Arc::new(Mutex::new(HashMap::new())); + + let last_function = Arc::new(Mutex::new(HashMap::new())); + + let idle_since = Cell::new(Instant::now()); + + let tickets = AtomicTickets { + start: AtomicUsize::new(0), + end: AtomicUsize::new(0), + }; + let worker_queue = Arc::new(tickets); + + let promise_buffer = PromiseBuffer::init(MAX_QUEUE); + + return BatchingQueue { + engine_counter, + engine_id, + general_queue_in, + general_queue_out, + atoms_times, + atoms_map, + last_function, + idle_since, + worker_queue, + promise_buffer, + }; + } +} + +#[cfg(feature = "auto_batching")] +impl Clone for BatchingQueue { + fn clone(&self) -> BatchingQueue { + self.engine_counter.set(self.engine_counter.get() + 1); + { + let mut last_function = self.last_function.lock().unwrap(); + last_function.insert(self.engine_counter.get(), u64::MAX); + } + + BatchingQueue { + engine_counter: self.engine_counter.clone(), + engine_id: self.engine_counter.get(), + general_queue_in: self.general_queue_in.clone(), + general_queue_out: self.general_queue_out.clone(), + atoms_times: self.atoms_times.clone(), + atoms_map: self.atoms_map.clone(), + idle_since: Cell::new(Instant::now()), + last_function: self.last_function.clone(), + worker_queue: self.worker_queue.clone(), + promise_buffer: self.promise_buffer.clone(), } - return promise.await; } } diff --git a/dispatcher/src/function_registry.rs b/dispatcher/src/function_registry.rs index a4d8ec4e..d4abccf1 100644 --- a/dispatcher/src/function_registry.rs +++ b/dispatcher/src/function_registry.rs @@ -20,7 +20,7 @@ use std::{ use crate::{ composition::{Composition, CompositionSet}, - execution_qs::EngineQueue, + dispatcher::FullQueue }; #[derive(Clone, Debug)] @@ -84,22 +84,26 @@ impl FunctionDict { /// Function to create a future that returns the loaded function async fn load_local( + function_id: FunctionId, static_domain: Arc>, driver: &'static dyn Driver, mut recorder: Recorder, - work_queue: Box, + work_queue: Box, path: String, + #[cfg(feature = "auto_batching")] gpu_id: u8, ) -> DandelionResult> { recorder.record(RecordPoint::ParsingQueue); let function = work_queue - .enqueu_work( + .enqueue_work( machine_interface::function_driver::WorkToDo::ParsingArguments { driver, path, static_domain, recorder: recorder.get_sub_recorder(), }, - ) + function_id, + #[cfg(feature = "auto_batching")] gpu_id, + )? .await? .get_function(); recorder.record(RecordPoint::ParsingDequeue); @@ -110,7 +114,7 @@ pub struct FunctionRegistry { /// List of engines available for each function engine_map: Mutex>>, /// Drivers for the engines to prepare function (get them from available to ready) - pub(crate) drivers: BTreeMap)>, + pub(crate) drivers: BTreeMap)>, /// map with list of all options for each function /// TODO: change structure to avoid copy on get_options options: Mutex>>, @@ -138,7 +142,7 @@ pub struct FunctionRegistry { impl FunctionRegistry { // TODO: make sure that system functions can't be added later for other engines pub fn new( - drivers: BTreeMap)>, + drivers: BTreeMap)>, type_map: &BTreeMap, domains: &BTreeMap>>, ) -> Self { @@ -369,6 +373,7 @@ impl FunctionRegistry { ctx_size: usize, non_caching: bool, mut recorder: Recorder, + #[cfg(feature = "auto_batching")] gpu_id: u8, ) -> DandelionResult<(Context, FunctionConfig)> { // get loader let (driver, load_queue) = match self.drivers.get(&engine_id) { @@ -386,11 +391,13 @@ impl FunctionRegistry { func_future.clone() } else { let func_future = (Box::pin(load_local( + function_id, domain.clone(), *driver, recorder.get_sub_recorder(), load_queue.clone(), path.clone(), + #[cfg(feature = "auto_batching")] gpu_id, )) as Pin> + Send>>) .shared(); @@ -406,17 +413,23 @@ impl FunctionRegistry { }; drop(lock_guard); let function = function_future.await?; - let function_config = function.config.clone(); + let mut function_config = function.config.clone(); + if let FunctionConfig::GpuConfig(ref mut config) = function_config { + config.function_id = function_id; + } + recorder.record(RecordPoint::LoadQueue); let context_work_done = load_queue - .enqueu_work( + .enqueue_work( machine_interface::function_driver::WorkToDo::LoadingArguments { function, domain, recorder: recorder.get_sub_recorder(), ctx_size: ctx_size, }, - ) + function_id, + #[cfg(feature = "auto_batching")] gpu_id, + )? .await; recorder.record(RecordPoint::LoadDequeue); let function_context = context_work_done?.get_context(); diff --git a/machine_interface/Cargo.toml b/machine_interface/Cargo.toml index 5df6506f..c4f76ec9 100644 --- a/machine_interface/Cargo.toml +++ b/machine_interface/Cargo.toml @@ -14,13 +14,21 @@ reqwest_io = ["std", "dep:reqwest", "dep:http", "dep:bytes"] bytes_context = ["std", "dep:bytes"] wasm = ["std"] timestamp = ["dandelion_commons/timestamp"] +gpu = ["std"] +cuda = ["gpu"] +hip = ["gpu"] +gpu_process = ["gpu"] +gpu_thread = ["gpu"] test_export = [] +reuse_weights = ["gpu"] +weights_from_disk = ["gpu"] +auto_batching = ["gpu"] [build-dependencies] cmake = "0.1" [dev-dependencies] -criterion = "0.4" +criterion = { version = "0.4", features = ["html_reports"] } # need to lock this to older version, because morello does not yet have rustc 1.6 regex = "1.7.1" test-log = "0.2.16" @@ -31,17 +39,18 @@ dandelion_commons = { path = "../dandelion_commons" } libc = "0.2" nix = "0.26" rand = "0.8" -serde = { version = "1.0", features = ["derive"] } +serde = { version = "1.0", features = ["derive", "rc"] } serde_json = "1.0" core_affinity = { version = "0.8"} futures = { version = "0.3.28" } -tokio = { version = "1", features = ["full"] } +tokio = { version = ">=1.3.7, <2.0.0", features = ["full"] } # need at least 1.3.7 for Semaphore.forget_tickets() memcache = "0.18.0" reqwest = { version = "0.12", optional = true } http = { version = "1.1", optional = true } bytes = { version = "1.6", optional = true} libloading = { version = "0.8.1" } log = "0.4.20" +lazy_static = "1.4.0" env_logger = "0.11.5" kvm-bindings = { version = "0.8", optional = true } kvm-ioctls = { version = "0.17", optional = true } @@ -55,10 +64,14 @@ bench = false name = "mmu_worker" required-features = ["mmu"] +[[bin]] +name = "gpu_worker" +required-features = ["gpu_process"] + [[bench]] name = "cheri_benchmark" harness = false [[bench]] name = "mmu_benchmark" -harness = false +harness = false \ No newline at end of file diff --git a/machine_interface/build.rs b/machine_interface/build.rs index 9398922a..0123a6c3 100644 --- a/machine_interface/build.rs +++ b/machine_interface/build.rs @@ -1,21 +1,47 @@ -fn main() { - // check if cheri is enabled and build library if so - #[cfg(feature = "cheri")] - { - use cmake::Config; - // cmake configure and build all - let _all = Config::new("c_machine_libraries") - .define("FORCE_BUILD_CHERI", "") - .build_target("all") - .build(); - // run tests tests - let _test = Config::new("c_machine_libraries") - .build_target("test") - .build(); - // install - let install = Config::new("c_machine_libraries").build(); - // passing cmake information to c - println!("cargo:rustc-link-search=native={}", install.display()); - println!("cargo:rustc-link-lib=static=cheri_lib"); - } -} +use cmake::Config; + +fn cmake_libraries() -> () { + // cmake configure and build all + let _all = Config::new("c_machine_libraries") + .define("FORCE_BUILD_CHERI", "") + .build_target("all") + .build(); + // run tests tests + let _test = Config::new("c_machine_libraries") + .build_target("test") + .build(); + // install + let install = Config::new("c_machine_libraries").build(); + // passing cmake information to c + println!("cargo:rustc-link-search=native={}", install.display()); + println!("cargo:rustc-link-lib=static=cheri_lib"); +} + +fn libraries_gpu() { + #[cfg(feature = "hip")] + { + // Link with HIP Runtime 6.2.2 + println!("cargo:rustc-link-search=/opt/rocm-6.2.2/lib"); + println!("cargo:rustc-link-lib=amdhip64"); + } + + #[cfg(feature = "cuda")] + { + // Link with CUDA Device API + println!("cargo:rustc-link-search=/usr/local/cuda/lib64/stubs"); + println!("cargo:rustc-link-lib=cuda"); + + // Link with CUDA Runtime API + println!("cargo:rustc-link-search=/usr/local/cuda/lib64"); + println!("cargo:rustc-link-lib=cudart"); + } +} + +fn main() { + // check if cheri is enabled and build library if so + #[cfg(feature = "cheri")] + cmake_libraries(); + + #[cfg(feature = "gpu")] + libraries_gpu(); +} diff --git a/machine_interface/src/bin/gpu_worker.rs b/machine_interface/src/bin/gpu_worker.rs new file mode 100644 index 00000000..42f90344 --- /dev/null +++ b/machine_interface/src/bin/gpu_worker.rs @@ -0,0 +1,59 @@ +use std::io; + +use std::time::Instant; +use core_affinity::CoreId; +use dandelion_commons::{records::Recorder, DandelionResult}; +use machine_interface::{ + function_driver::{ + compute_driver::gpu::{gpu_utils::SendFunctionArgs, GpuLoop}, + thread_utils::EngineLoop, + ComputeResource, FunctionConfig, + }, + memory_domain::Context, +}; + +fn main() { + // parse args + let args: Vec = std::env::args().collect(); + assert_eq!(args.len(), 4); + let core_id: u8 = args[1].parse().expect("Invalid core ID"); + let gpu_id: u8 = args[2].parse().expect("Invalid GPU ID"); + let worker_count: u8 = args[3].parse().expect("Invalid worker count"); + + // set cpu affinity + assert!(core_affinity::set_for_current(CoreId { + id: core_id as usize + })); + + // setup worker struct + let mut worker = GpuLoop::init(ComputeResource::GPU(core_id, gpu_id, worker_count)) + .expect("Should be able to create worker"); + + // unwrap okay, as all lines are valid Strings + for inp in io::stdin().lines().map(|l| l.unwrap()) { + match execute(&mut worker, inp) { + Ok(()) => { + println!("__OK__"); + } + Err(e) => { + println!("__ERROR__ {:?}", e); + } + } + } +} + +fn execute(worker: &mut GpuLoop, inp: String) -> DandelionResult<()> { + let SendFunctionArgs { + config, + context, + output_sets, + } = serde_json::from_str(&inp).expect("Parsing function args failed"); + + let config = FunctionConfig::GpuConfig(config); + let context: Context = context.try_into()?; + + // TODO : this recorder is just a placeholder + // Currently, what's measured by the process is lost, and not reported + let recorder = Recorder::new(0, Instant::now()); + worker.run(config, context, output_sets, recorder).map(|_| ()) +} diff --git a/machine_interface/src/function_driver.rs b/machine_interface/src/function_driver.rs index fe838dca..f67fc73e 100644 --- a/machine_interface/src/function_driver.rs +++ b/machine_interface/src/function_driver.rs @@ -1,20 +1,27 @@ +use std::collections::HashMap; use crate::{ - memory_domain::{Context, MemoryDomain}, + memory_domain::{transfer_memory, Context, MemoryDomain}, + interface::DandelionSystemData, DataRequirementList, }; extern crate alloc; use alloc::sync::Arc; -use dandelion_commons::{records::Recorder, DandelionError, DandelionResult}; +use dandelion_commons::{records::Recorder, DandelionError, DandelionResult, FunctionId}; +use serde::{Deserialize, Serialize}; #[cfg(feature = "wasm")] use libloading::Library; +#[cfg(feature = "gpu")] +use self::compute_driver::gpu::config_parsing::ExecutionBlueprint; + pub mod compute_driver; mod load_utils; pub mod system_driver; + #[cfg(test)] mod test_queue; -mod thread_utils; +pub mod thread_utils; #[derive(Clone)] #[allow(dead_code)] @@ -54,11 +61,22 @@ pub struct WasmConfig { system_data_struct_offset: usize, } +#[derive(Clone, Serialize, Deserialize)] +pub struct GpuConfig { + pub function_id: FunctionId, + pub system_data_struct_offset: usize, + pub code_object_offset: usize, + pub kernels: Arc>>, + #[cfg(feature = "gpu")] + pub blueprint: Arc, +} + #[derive(Clone)] pub enum FunctionConfig { ElfConfig(ElfConfig), SysConfig(SystemFunction), WasmConfig(WasmConfig), + GpuConfig(GpuConfig), } pub struct Function { @@ -73,7 +91,7 @@ impl Function { domain: &Box, ctx_size: usize, ) -> DandelionResult { - return match &self.config { + match &self.config { FunctionConfig::ElfConfig(_) => { load_utils::load_static(domain, self.context.clone(), &self.requirements, ctx_size) } @@ -86,14 +104,44 @@ impl Function { context.occupy_space(0, c.sdk_heap_base)?; Ok(context) } - }; + // no need to occupy space or anything like that as long as context is only inputs/outputs + FunctionConfig::GpuConfig(cfg) => { + // TODO : change here. + let mut ctxt = domain.acquire_context(ctx_size)?; + // Make sure sysdata struct isn't overwritten, 0 = system_data_offset + ctxt.occupy_space( + cfg.system_data_struct_offset, + std::mem::size_of::>(), + )?; + // Transfer code object + transfer_memory( + &mut ctxt, + self.context.clone(), + cfg.code_object_offset, + 0, + self.context.size, + )?; + // Mark code object storage as occupied + ctxt.occupy_space(cfg.code_object_offset, self.context.size)?; + Ok(ctxt) + } + } } } #[derive(Debug, Clone, Copy)] pub enum ComputeResource { CPU(u8), - GPU(u8), + GPU(u8, u8, u8), // CPU core, GPU id, number of workers per GPU (only relevant for gpu_process). Eventually the CPU and GPU parts should be split +} + +#[cfg(feature = "auto_batching")] +#[derive(Clone, Debug)] +pub struct AtomInputs { + /// items identfied by tuple of key, item index and the context reference + pub item_list: Vec<(u32, usize, Arc)>, + /// the set side inside the contexts the composition set represents + pub set_index: usize, } pub enum WorkToDo { @@ -126,13 +174,33 @@ pub enum WorkToDo { ctx_size: usize, recorder: Recorder, }, + #[cfg(feature = "auto_batching")] + BatchAtom { + function_id: FunctionId, + inputs: Vec>, + recorder: Recorder, + inputs_vec: Option>>, + children_debts: Option>, + gpu_id: Option, + }, Shutdown(), } +#[cfg(feature = "auto_batching")] +pub struct BatchInfo { + pub batch_pos: usize, + pub inputs_vec: Option>>, + pub context_arc: Option>, + pub children_debts: Option>, + pub gpu_id: Option, +} + pub enum WorkDone { Context(Context), Function(Function), Resources(Vec), + #[cfg(feature = "auto_batching")] + SharedContext(BatchInfo), } impl WorkDone { @@ -148,6 +216,13 @@ impl WorkDone { _ => panic!("WorkDone is not function when function was expected"), }; } + #[cfg(feature = "auto_batching")] + pub fn get_shared_context(self) -> BatchInfo { + return match self { + WorkDone::SharedContext(batched_info) => batched_info, + _ => panic!("WorkDone is not SharedContext when SharedContext was expected"), + }; + } } pub trait WorkQueue { @@ -180,7 +255,7 @@ pub trait Driver: Send + Sync { &self, resource: ComputeResource, // TODO check out why this can't be impl instead of Box, + queue: Box, ) -> DandelionResult<()>; // parses an executable, diff --git a/machine_interface/src/function_driver/compute_driver.rs b/machine_interface/src/function_driver/compute_driver.rs index 92aaa340..cd8b7714 100644 --- a/machine_interface/src/function_driver/compute_driver.rs +++ b/machine_interface/src/function_driver/compute_driver.rs @@ -10,5 +10,8 @@ pub mod kvm; #[cfg(feature = "wasm")] pub mod wasm; +#[cfg(feature = "gpu")] +pub mod gpu; + #[cfg(test)] mod compute_driver_tests; diff --git a/machine_interface/src/function_driver/compute_driver/compute_driver_tests.rs b/machine_interface/src/function_driver/compute_driver/compute_driver_tests.rs index f5e931ae..acfdf0f3 100644 --- a/machine_interface/src/function_driver/compute_driver/compute_driver_tests.rs +++ b/machine_interface/src/function_driver/compute_driver/compute_driver_tests.rs @@ -1,8 +1,9 @@ #[cfg(all( test, - any(feature = "cheri", feature = "mmu", feature = "kvm", feature = "wasm") + any(feature = "cheri", feature = "mmu", feature = "kvm", feature = "wasm", feature = "gpu") ))] -mod compute_driver_tests { +#[allow(clippy::module_inception)] +pub(crate) mod compute_driver_tests { use crate::{ function_driver::{ test_queue::TestQueue, ComputeResource, Driver, FunctionConfig, WorkToDo, @@ -46,7 +47,7 @@ mod compute_driver_tests { } } - fn prepare_engine_and_function( + pub fn prepare_engine_and_function( filename: &str, dom_init: MemoryResource, driver: &Box, @@ -61,12 +62,12 @@ mod compute_driver_tests { .start_engine(drv_init[0], queue.clone()) .expect("Should be able to start engine"); let function_context = function - .load(&domain, 0x802_0000) + .load(&domain, 0x1_8002_0000) // TODO(GPU) : choose a good value for the context size .expect("Should be able to load function"); return (function_context, function.config, queue); } - fn engine_minimal( + pub fn engine_minimal( filename: &str, dom_init: MemoryResource, driver: Box, @@ -143,7 +144,7 @@ mod compute_driver_tests { assert_eq!(4, read_buffer[1]); } - fn get_expected_mat(size: usize) -> Vec { + pub fn get_expected_mat(size: usize) -> Vec { let mut in_mat_vec = Vec::::new(); for i in 0..(size * size) { in_mat_vec.push(i as i64); @@ -570,7 +571,7 @@ mod compute_driver_tests { .collect())).expect("Should have at least one core"); vec![ ComputeResource::CPU(255), - ComputeResource::GPU(0) + ComputeResource::GPU(0, 0, 0) ]); } @@ -589,7 +590,7 @@ mod compute_driver_tests { .collect())).expect("Should have at least one core"); vec![ ComputeResource::CPU(255), - ComputeResource::GPU(0) + ComputeResource::GPU(0, 0, 0) ]); #[cfg(target_arch = "aarch64")] driverTests!(elf_mmu_aarch64; MmuMemoryDomain; MemoryResource::Shared { id: 0, size: (1<<30) }; MmuDriver {}; @@ -634,7 +635,7 @@ mod compute_driver_tests { .collect())).expect("Should have at least one core"); vec![ ComputeResource::CPU(255), - ComputeResource::GPU(0), + ComputeResource::GPU(0, 0, 0), ]); } @@ -654,7 +655,7 @@ mod compute_driver_tests { .collect())).expect("Should have at least one core"); vec![ ComputeResource::CPU(255), - ComputeResource::GPU(0), + ComputeResource::GPU(0, 0, 0), ]); #[cfg(target_arch = "aarch64")] @@ -668,7 +669,7 @@ mod compute_driver_tests { .collect())).expect("Should have at least one core"); vec![ ComputeResource::CPU(255), - ComputeResource::GPU(0), + ComputeResource::GPU(0, 0, 0), ]); } } diff --git a/machine_interface/src/function_driver/compute_driver/gpu.rs b/machine_interface/src/function_driver/compute_driver/gpu.rs new file mode 100644 index 00000000..24ca8e20 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu.rs @@ -0,0 +1,551 @@ +#[cfg(feature = "gpu_process")] +use self::gpu_utils::start_gpu_process_pool; +use self::{ + buffer_pool::BufferPool, + config_parsing::{Action, Argument, RuntimeGpuConfig, SYSDATA_OFFSET}, + gpu_utils::{copy_data_to_device, get_data_length, get_size, write_gpu_outputs}, +}; +use crate::{ + function_driver::{ + thread_utils::{run_thread, EngineLoop}, + ComputeResource, Driver, Function, FunctionConfig, GpuConfig, WorkQueue, + }, + interface::DandelionSystemData, + memory_domain::{Context, ContextTrait, ContextType, MemoryDomain}, + DataRequirementList, +}; +use core_affinity::CoreId; +use dandelion_commons::{ + records::{RecordPoint, Recorder}, + DandelionError, DandelionResult, +}; +use libc::c_void; +use std::{ + borrow::Borrow, + collections::HashMap, + ptr::null, + sync::{ + mpsc::{self, Receiver, Sender}, + Arc, Mutex, + }, + thread::{self, spawn}, +}; + +pub(crate) mod buffer_pool; +pub(crate) mod config_parsing; +mod gpu_api; +pub mod gpu_utils; + +#[cfg(test)] +mod gpu_tests; + +fn execute( + actions: &Vec, + buffer_pool: &BufferPool, + context: &Context, + config: &RuntimeGpuConfig, + #[cfg(feature = "auto_batching")] batch_size: usize, +) -> DandelionResult<()> { + for action in actions { + match action { + Action::ExecKernel(name, args, launch_config) => { + // Explanation: + // HIP expects arguments as an array of void pointers (pointers to the arguments). + // BufferPool.get() returns a stack allocated struct, so we need to allocate it outside of the loop, + // otherwise the pointer becomes invalid. In our case we allocate it on the heap using Box. If we + // just used a Vec instead, pointers to its element would become invalid when the Vec + // resizes. Additionally, Box provides the convenient into_raw function to make sure the data lives + // long enough, although this means we must manually deallocate at the end. + + let mut params: Vec<*const c_void> = Vec::with_capacity(args.len()); + let mut dev_ptrs = Vec::with_capacity(args.len()); + for arg in args { + match arg { + Argument::Ptr(id) => { + let dev_ptr = buffer_pool.get_pointer(id)?; + dev_ptrs.push(Box::into_raw(Box::new(dev_ptr))); + params.push(*dev_ptrs.last().unwrap() as *const c_void); + } + Argument::Sizeof(id) => { + params.push(&buffer_pool.get_size(id) as *const _ as *const c_void); + } + Argument::Constant(constant) => { + params.push(constant as *const _ as *const c_void); + } + }; + } + + gpu_api::module_launch_kernel( + config + .kernels + .get(name) + .ok_or(DandelionError::UndeclaredIdentifier(name.to_owned()))?, + get_size( + &launch_config.grid_dim_x, + buffer_pool, + context, + #[cfg(feature = "auto_batching")] + batch_size, + )? as u32, + get_size( + &launch_config.grid_dim_y, + buffer_pool, + context, + #[cfg(feature = "auto_batching")] + batch_size, + )? as u32, + get_size( + &launch_config.grid_dim_z, + buffer_pool, + context, + #[cfg(feature = "auto_batching")] + batch_size, + )? as u32, + get_size( + &launch_config.block_dim_x, + buffer_pool, + context, + #[cfg(feature = "auto_batching")] + batch_size, + )? as u32, + get_size( + &launch_config.block_dim_y, + buffer_pool, + context, + #[cfg(feature = "auto_batching")] + batch_size, + )? as u32, + get_size( + &launch_config.block_dim_z, + buffer_pool, + context, + #[cfg(feature = "auto_batching")] + batch_size, + )? as u32, + get_size( + &launch_config.shared_mem_bytes, + buffer_pool, + context, + #[cfg(feature = "auto_batching")] + batch_size, + )? as u32, + gpu_api::DEFAULT_STREAM, + params.as_ptr(), + null(), + )?; + + // Manually deallocate heap memory we performed into_raw on + for ptr in dev_ptrs { + unsafe { + let allocation = Box::from_raw(ptr); + drop(allocation); // Not necessary, just do it's very explicit we're dropping the data here + } + } + } + Action::Repeat(times, actions) => { + let repetitions = get_size( + times, + buffer_pool, + context, + #[cfg(feature = "auto_batching")] + batch_size, + )?; + for _ in 0..repetitions { + execute( + actions, + buffer_pool, + context, + config, + #[cfg(feature = "auto_batching")] + batch_size, + )?; + } + } + } + } + + #[cfg(feature = "timestamp")] + let _ = gpu_api::synchronize(); + + Ok(()) +} + +pub fn gpu_run( + cpu_slot: usize, + gpu_id: u8, + config: GpuConfig, + buffer_pool: Arc>, + mut context: Context, + output_sets: Arc>, + mut recorder: Recorder, +) -> DandelionResult { + // Set affinity of worker thread + if !core_affinity::set_for_current(CoreId { id: cpu_slot }) { + return Err(DandelionError::EngineResourceError); + } + + gpu_api::set_device(gpu_id)?; + + let (read_only_data, inputs_data) = match context.context { + ContextType::Gpu(ref gpu_context) => (&gpu_context.read_only, &gpu_context.inputs), + _ => return Err(DandelionError::ContextMissmatch), + }; + #[cfg(feature = "auto_batching")] + let batch_size = match context.context { + ContextType::Gpu(ref gpu_context) => gpu_context.batch_size, + _ => return Err(DandelionError::ContextMissmatch), + }; + + // Load modules and kernels + let mut loaded_modules_map: HashMap = HashMap::new(); + let mut loaded_modules: Vec = Vec::new(); + let mut loaded_kernels: HashMap = HashMap::new(); + for kernel in config.kernels.iter() { + let module_name = kernel["module_name"].clone(); + let kernel_name = &kernel["kernel_name"]; + + if !loaded_modules_map.contains_key(&module_name) { + let sub_read_only = read_only_data.get(&module_name).unwrap(); + let data_pointer = sub_read_only + .context + .get_chunk_ref(sub_read_only.position.offset, sub_read_only.position.size) + .unwrap() + .as_ptr() as *const c_void; + + let loaded_module = gpu_api::module_load_data(data_pointer)?; + loaded_modules_map.insert(module_name.clone(), loaded_modules.len()); + loaded_modules.push(loaded_module); + } + + let module = &loaded_modules[loaded_modules_map[&module_name]]; + let loaded_kernel = gpu_api::module_get_function(&module, kernel_name)?; + let _ = loaded_kernels + .insert(kernel_name.to_string(), loaded_kernel) + .ok_or(DandelionError::UnknownSymbol); + } + + let function_id = config.function_id; + let mut buffer_pool = buffer_pool.lock().unwrap(); + + let config = RuntimeGpuConfig { + system_data_struct_offset: 0, + modules: Arc::new(loaded_modules), + kernels: Arc::new(loaded_kernels), + blueprint: config.blueprint, + }; + + recorder.record(RecordPoint::GPUTransferStart); + let mut reload_weights = true; + #[cfg(feature = "reuse_weights")] + { + reload_weights = buffer_pool.prev_function_id != function_id; + } + recorder.set_gpu_info(!reload_weights, gpu_id); + + if reload_weights { + buffer_pool.prev_function_id = function_id; + buffer_pool.dealloc_all()?; + + for name in &config.blueprint.weights { + let sub_read_only = read_only_data.get(name.as_str()).unwrap(); + let data_pointer = sub_read_only + .context + .get_chunk_ref(sub_read_only.position.offset, sub_read_only.position.size) + .unwrap() + .as_ptr() as *const c_void; + let size = sub_read_only.position.size; + + let _ = buffer_pool.alloc_buffer(name, size, true)?; + let dev_ptr = buffer_pool.get_pointer(name)?; + + gpu_api::memcpy_h_to_d(&dev_ptr, 0, data_pointer, size)?; + } + } + + #[cfg(not(feature = "auto_batching"))] + for name in &config.blueprint.inputs { + let input = inputs_data.get(name.as_str()).unwrap(); + let size = input.position.size; + + let _ = buffer_pool.alloc_buffer(name, size, false)?; + let dev_ptr = buffer_pool.get_pointer(name)?; + + let mut data_read = 0; + while data_read < size { + let data = input + .context + .get_chunk_ref( + input.position.offset + data_read, + input.position.size - data_read, + ) + .unwrap(); + let size_read = data.len(); + let data_pointer = data.as_ptr() as *const c_void; + + gpu_api::memcpy_h_to_d( + &dev_ptr, + data_read.try_into().unwrap(), + data_pointer, + size_read, + )?; + + data_read += size_read + 1; + } + } + #[cfg(feature = "auto_batching")] + for name in &config.blueprint.inputs { + let name0 = format!("{}0", name); + let input0 = inputs_data.get(name0.as_str()).unwrap(); + let size_single = input0.position.size; + let size_batch = size_single * batch_size; + + let _ = buffer_pool.alloc_buffer(name, size_batch, false)?; + let dev_ptr = buffer_pool.get_pointer(name)?; + + let mut input_offset = 0; + for i in 0..batch_size { + let name_idx = format!("{}{}", name.clone(), i); + let input = inputs_data.get(name_idx.as_str()).unwrap(); + + let mut data_read = 0; + while data_read < size_single { + let data = input + .context + .get_chunk_ref( + input.position.offset + data_read, + input.position.size - data_read, + ) + .unwrap(); + let size_read = data.len(); + let data_pointer = data.as_ptr() as *const c_void; + + gpu_api::memcpy_h_to_d( + &dev_ptr, + (input_offset + data_read).try_into().unwrap(), + data_pointer, + size_read, + )?; + + data_read += size_read + 1; + } + input_offset += size_single; + } + } + + for (name, sizing) in &config.blueprint.buffers { + let size = get_size( + sizing, + &buffer_pool, + &context, + #[cfg(feature = "auto_batching")] + batch_size, + )? as usize; + let _ = buffer_pool.alloc_buffer(name, size, false)?; + } + recorder.record(RecordPoint::GPUTransferEnd); + + recorder.record(RecordPoint::GPUInferenceStart); + execute( + &config.blueprint.control_flow, + buffer_pool.borrow(), + &context, + &config, + #[cfg(feature = "auto_batching")] + batch_size, + )?; + recorder.record(RecordPoint::GPUInferenceEnd); + + recorder.record(RecordPoint::GPUOutputStart); + // Copy results back into host memory from device memory + write_gpu_outputs( + &mut context, + &output_sets, + buffer_pool.borrow(), + #[cfg(feature = "auto_batching")] + batch_size, + )?; + recorder.record(RecordPoint::GPUOutputEnd); + + // Zero out input, temporary buffers, and output buffers + buffer_pool.dealloc_tmp_buffers()?; + + Ok(context) +} + +pub struct GpuLoop { + cpu_slot: usize, + gpu_id: u8, + buffers: Arc>, + sender: Sender>, + receiver: Receiver>, +} + +#[allow(non_upper_case_globals)] +const Gi: usize = 1 << 30; + +// TODO: add adaptive amount if other GPUs are used: +// MI210 - 64GiB => 60 * Gi +// RTX 3090 - 24GiB => 23 * Gi +const VRAM_SIZE: usize = 23 * Gi; + +impl EngineLoop for GpuLoop { + fn init(resource: ComputeResource) -> DandelionResult> { + let ComputeResource::GPU(cpu_slot, gpu_id, worker_count) = resource else { + return Err(DandelionError::EngineResourceError); + }; + + let (sender, receiver) = mpsc::channel(); + + Ok(Box::new(Self { + cpu_slot: cpu_slot as usize, + gpu_id, + buffers: Arc::new(Mutex::new(BufferPool::try_new( + gpu_id, + VRAM_SIZE / worker_count as usize, + )?)), + sender, + receiver, + })) + } + + fn run( + &mut self, + config: FunctionConfig, + context: Context, + output_sets: Arc>, + recorder: Recorder, + ) -> DandelionResult { + let FunctionConfig::GpuConfig(config) = config else { + return Err(DandelionError::ConfigMissmatch); + }; + let subrecorder = recorder.get_sub_recorder(); + + // Clone for thread + let buffer_pool = self.buffers.clone(); + let sender = self.sender.clone(); + let cpu_slot = self.cpu_slot; + let gpu_id = self.gpu_id; + thread::spawn(move || { + let result = gpu_run( + cpu_slot, + gpu_id, + config, + buffer_pool, + context, + output_sets, + subrecorder, + ); + sender.send(result).unwrap(); + }); + + // TODO: add proper error handling mechanisms + // Use an mpsc to receive results. If a fault occured, the handler could be registered to put an error on the channel, + // while the work thread wouldn't return. This means it would have to be shot down + let context = self + .receiver + .recv() + .map_err(|_| DandelionError::EngineError) + .and_then(|inner| inner)?; + + Ok(context) + } +} + +// Function parsing logic that can be shared between gpu_thread and gpu_process variants +fn common_parse( + function_path: String, + static_domain: &Box, +) -> DandelionResult { + let requirements = DataRequirementList { + static_requirements: vec![], + input_requirements: vec![], + }; + + let context = Box::new(static_domain.acquire_context(0)?); + + let mut gpu_config = config_parsing::parse_config(&function_path)?; + gpu_config.code_object_offset = + SYSDATA_OFFSET + std::mem::size_of::>(); + let config = FunctionConfig::GpuConfig(gpu_config); + + Ok(Function { + requirements, + context: Arc::from(context), + config, + }) +} + +// Engine start-up logic that can be shared between gpu_thread and gpu_process variants +fn common_start(resource: ComputeResource) -> DandelionResult<(u8, u8, u8)> { + // extract resources + let (cpu_slot, gpu_id, worker_count) = match resource { + ComputeResource::GPU(cpu, gpu, worker_count) => (cpu, gpu, worker_count), + _ => return Err(DandelionError::EngineResourceError), + }; + // check that core is available + let available_cores = match core_affinity::get_core_ids() { + None => return Err(DandelionError::EngineError), + Some(cores) => cores, + }; + if !available_cores + .iter() + .any(|x| x.id == usize::from(cpu_slot)) + { + return Err(DandelionError::EngineResourceError); + } + // check gpu is available + // gpu_api::limit_heap_size(0)?; + if usize::from(gpu_id) >= gpu_api::get_device_count()? { + return Err(DandelionError::EngineResourceError); + } + + Ok((cpu_slot, gpu_id, worker_count)) +} + +pub struct GpuThreadDriver {} + +impl Driver for GpuThreadDriver { + fn start_engine( + &self, + resource: ComputeResource, + queue: Box, + ) -> dandelion_commons::DandelionResult<()> { + let (cpu_slot, gpu_id, _) = common_start(resource)?; + + // Pass worker_count as 1 to make sure gpu_thread takes full memory region + spawn(move || run_thread::(ComputeResource::GPU(cpu_slot, gpu_id, 1), queue)); + Ok(()) + } + + fn parse_function( + &self, + function_path: String, + static_domain: &Box, + ) -> DandelionResult { + common_parse(function_path, static_domain) + } +} + +pub struct GpuProcessDriver {} + +impl Driver for GpuProcessDriver { + fn start_engine( + &self, + resource: ComputeResource, + queue: Box, + ) -> dandelion_commons::DandelionResult<()> { + let (cpu_slot, gpu_id, worker_count) = common_start(resource)?; + + #[cfg(feature = "gpu_process")] + start_gpu_process_pool(cpu_slot, gpu_id, worker_count, queue); + Ok(()) + } + + fn parse_function( + &self, + function_path: String, + static_domain: &Box, + ) -> DandelionResult { + common_parse(function_path, static_domain) + } +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/buffer_pool.rs b/machine_interface/src/function_driver/compute_driver/gpu/buffer_pool.rs new file mode 100644 index 00000000..dda547ec --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/buffer_pool.rs @@ -0,0 +1,116 @@ +use super::gpu_api::{self, DeviceAllocation, DevicePointer}; +use dandelion_commons::{DandelionError, DandelionResult, FunctionId}; +use log::debug; +use std::collections::HashMap; + +#[derive(Debug)] +struct Buffer { + offset: usize, + size: usize, +} + +#[derive(Debug)] +pub struct BufferPool { + // (ptr, ptr in use) + allocation: DeviceAllocation, + buffers: HashMap, + pub prev_function_id: FunctionId, + last_offset_global: usize, + last_offset_weights: usize, + tmp_names: Vec, +} + +impl BufferPool { + pub fn try_new(gpu_id: u8, region_size: usize) -> DandelionResult { + gpu_api::set_device(gpu_id)?; + + let mut allocation = gpu_api::DeviceAllocation::try_new(region_size)?; + allocation.zero_out()?; + let buffers = HashMap::new(); + let last_offset_global = 0usize; + let last_offset_weights = 0usize; + let tmp_names = Vec::new(); + Ok(Self { + allocation, + buffers, + prev_function_id: u64::MAX, + last_offset_global, + last_offset_weights, + tmp_names, + }) + } + + pub fn alloc_buffer( + &mut self, + name: &str, + size: usize, + is_weight: bool, + ) -> DandelionResult<()> { + let dev_ptr = self.last_offset_global; + + macro_rules! align { + ($e: expr) => { + ($e + 255) / 256 * 256 + }; + } + // Round size to 256 bytes, which is the minimum that GPU allocators typically use. This might need to be changed + let aligned_size = align!(size); + + if dev_ptr + aligned_size > self.allocation.size { + debug!( + "Going to throw OutOfMemory, offset: {}, aligned_size: {}, self_alloc_size: {}", + dev_ptr, aligned_size, self.allocation.size + ); + return Err(DandelionError::OutOfMemory); + } + + self.last_offset_global = align!(self.last_offset_global + aligned_size); + if is_weight { + self.last_offset_weights = self.last_offset_global; + } else { + self.tmp_names.push(name.to_string()); + } + + self.buffers.insert( + name.to_string(), + Buffer { + offset: dev_ptr, + size: size, + }, + ); + + Ok(()) + } + + pub fn get_pointer(&self, name: &str) -> DandelionResult { + let buffer = self.buffers.get(name).ok_or(DandelionError::EngineError)?; + Ok(DevicePointer { + ptr: unsafe { self.allocation.ptr.byte_add(buffer.offset) }, + }) + } + + pub fn get_size(&self, name: &str) -> DandelionResult { + let buffer = self.buffers.get(name).ok_or(DandelionError::EngineError)?; + Ok(buffer.size) + } + + pub fn dealloc_tmp_buffers(&mut self) -> DandelionResult<()> { + self.allocation + .zero_from_to(self.last_offset_weights, self.last_offset_global)?; + for name in &self.tmp_names { + self.buffers.remove(name); + } + self.last_offset_global = self.last_offset_weights; + self.tmp_names.clear(); + Ok(()) + } + + pub fn dealloc_all(&mut self) -> DandelionResult<()> { + self.allocation.zero_size(self.last_offset_global)?; + self.buffers.clear(); + self.last_offset_global = 0; + self.last_offset_weights = 0; + self.tmp_names.clear(); + Ok(()) + } +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/config_parsing.rs b/machine_interface/src/function_driver/compute_driver/gpu/config_parsing.rs new file mode 100644 index 00000000..4a5581e8 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/config_parsing.rs @@ -0,0 +1,89 @@ +use super::gpu_api::{Function, Module}; +use crate::function_driver::GpuConfig; +use dandelion_commons::{DandelionError, DandelionResult}; +use serde::{Deserialize, Serialize}; +use std::{collections::HashMap, fs::File, io::BufReader, sync::Arc}; + +pub const SYSDATA_OFFSET: usize = 0usize; + +#[derive(Deserialize, Serialize, Debug)] +pub enum Sizing { + Sizeof(String), + Absolute(u64), + #[cfg(feature = "auto_batching")] + AbsoluteByBatch(u64), + #[cfg(feature = "auto_batching")] + AbsoluteByBatchEven(u64), + FromInput { + bufname: String, + idx: usize, + }, +} + +#[derive(Deserialize, Serialize, Debug)] +pub enum Argument { + Ptr(String), + Sizeof(String), + Constant(i64), +} + +#[derive(Deserialize, Serialize, Debug)] +pub struct LaunchConfig { + pub grid_dim_x: Sizing, + pub grid_dim_y: Sizing, + pub grid_dim_z: Sizing, + pub block_dim_x: Sizing, + pub block_dim_y: Sizing, + pub block_dim_z: Sizing, + pub shared_mem_bytes: Sizing, +} + +#[derive(Deserialize, Serialize, Debug)] +pub enum Action { + ExecKernel(String, Vec, Box), + Repeat(Sizing, Vec), +} + +#[derive(Deserialize, Serialize, Debug)] +pub struct ExecutionBlueprint { + pub inputs: Vec, + pub weights: Vec, + pub buffers: HashMap, + pub outputs: Vec, + pub control_flow: Vec, +} + +#[derive(Deserialize, Serialize, Debug)] +struct GpuConfigIR { + modules: Vec>, + kernels: Vec>, + blueprint: ExecutionBlueprint, +} + +impl From for GpuConfig { + fn from(value: GpuConfigIR) -> Self { + Self { + function_id: u64::MAX, + system_data_struct_offset: SYSDATA_OFFSET, + code_object_offset: 0, + kernels: Arc::new(value.kernels), + blueprint: Arc::new(value.blueprint), + } + } +} + +#[derive(Clone)] +pub struct RuntimeGpuConfig { + pub system_data_struct_offset: usize, + pub modules: Arc>, + pub kernels: Arc>, + pub blueprint: Arc, +} + +pub fn parse_config(path: &str) -> DandelionResult { + let file = File::open(path).map_err(|_| DandelionError::FileError)?; + let reader = BufReader::new(file); + let ir: GpuConfigIR = serde_json::from_reader(reader) + .map_err(|e| DandelionError::ParsingJSONError(format!("{e}")))?; + Ok(ir.into()) +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/cuda.rs b/machine_interface/src/function_driver/compute_driver/gpu/cuda.rs new file mode 100644 index 00000000..df486f97 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/cuda.rs @@ -0,0 +1,365 @@ +//! Rust bindings for a subset of the CUDA Device API + +use dandelion_commons::{DandelionError, DandelionResult}; +use libc::{c_void, size_t}; +use std::{ + ffi::{CStr, CString}, + ptr::null, +}; + +type ErrorT = u32; +type CUresult = u32; +type CUdevice = *const i32; +type CUcontext = *const c_void; + +type CUmodule = *const c_void; +pub struct Module(CUmodule); + +unsafe impl Send for Module {} +unsafe impl Sync for Module {} + +type CUfunction = *const c_void; +pub struct Function(CUfunction); + +unsafe impl Send for Function {} +unsafe impl Sync for Function {} + +// typedef struct iHipStream_t* hipStream_t +pub type StreamT = *const c_void; +pub const DEFAULT_STREAM: StreamT = null(); + +// has to be pub to allow address-getting when preparing args +#[derive(Debug)] +pub struct DeviceAllocation { + pub ptr: *const c_void, + pub size: usize, + device: u8, +} + +unsafe impl Send for DeviceAllocation {} +unsafe impl Sync for DeviceAllocation {} + +// Should be associated with a DeviceAllocation; maybe use lifetimes for this in the future +#[repr(C)] // We take a raw pointers, so make sure the layout is as expected +pub struct DevicePointer { + pub ptr: *const c_void, +} + +#[link(name = "cudart")] +unsafe extern "C" { + fn cudaGetErrorString(error: ErrorT) -> *const i8; + fn cudaGetDevice(device_: *const i32) -> ErrorT; + fn cudaGetDeviceCount(count: *const i32) -> ErrorT; + fn cudaSetDevice(device: *const i32) -> ErrorT; + fn cudaDeviceSetLimit(limit: u32, value: size_t) -> ErrorT; + + fn cudaMalloc(ptr: *mut *const c_void, size: size_t) -> ErrorT; + fn cudaMemset(dst: *const c_void, value: i32, count: size_t) -> ErrorT; + fn cudaFree(ptr: *const c_void) -> ErrorT; + fn cudaMemcpy(dst: *const c_void, src: *const c_void, count: size_t, kind: u8) -> ErrorT; + + fn cudaDeviceSynchronize() -> ErrorT; +} + +#[link(name = "cuda")] +unsafe extern "C" { + fn cuGetErrorString(error: CUresult, pStr: *mut *const i8) -> CUresult; + + fn cuInit(flags: u32) -> CUresult; + fn cuDeviceGet(device: *mut CUdevice, ordinal: i32) -> CUresult; + fn cuCtxCreate(pctx: *mut CUcontext, flags: u32, dev: CUdevice) -> CUresult; + fn cuDevicePrimaryCtxRetain(pctx: *mut CUcontext, dev: CUdevice) -> CUresult; + fn cuCtxSetCurrent(ctx: CUcontext) -> CUresult; + fn cuCtxDestroy(pctx: CUcontext) -> CUresult; + fn cuCtxSynchronize() -> CUresult; + + fn cuModuleLoad(module: *mut CUmodule, fname: *const i8) -> CUresult; + fn cuModuleLoadData(module: *mut CUmodule, image: *const c_void) -> CUresult; + fn cuModuleUnload(module: CUmodule) -> CUresult; + fn cuModuleGetFunction( + function: *mut CUfunction, + module: CUmodule, + kname: *const i8, + ) -> CUresult; + fn cuLaunchKernel( + function: CUfunction, + grid_dim_x: u32, + grid_dim_y: u32, + grid_dim_z: u32, + block_dim_x: u32, + block_dim_y: u32, + block_dim_z: u32, + shared_mem_bytes: u32, + stream: StreamT, + kernel_params: *const *const c_void, + extra: *const *const c_void, + ) -> CUresult; + + // fn cuMemAlloc(ptr: *mut *const c_void, size: size_t) -> CUresult; + // fn cuMemsetD8(dst: *const c_void, value: i8, sizeBytes: size_t) -> CUresult; + // fn cuMemFree(ptr: *const c_void) -> CUresult; + // fn cuMemcpyHtoD(dst: *const c_void, src: *const c_void, sizeBytes: size_t) -> CUresult; + // fn cuMemcpyDtoH(dst: *const c_void, src: *const c_void, sizeBytes: size_t) -> CUresult; +} + +fn get_cu_error_string(error_code: CUresult) -> String { + let mut error_str_ptr: *const i8 = null(); + unsafe { + let _ = cuGetErrorString(error_code, &mut error_str_ptr); + CStr::from_ptr(error_str_ptr) + .to_str() + .expect("Invalid CUDA error string (shouldn't happen)") + .to_string() + } +} + +fn get_cuda_error_string(error_code: ErrorT) -> String { + unsafe { + CStr::from_ptr(cudaGetErrorString(error_code) as *mut i8) + .to_str() + .expect("Invalid CUDA error string (shouldn't happen)") + .to_string() + } +} + +macro_rules! cu_checked_call { + ($fcall: expr) => { + unsafe { + let error = $fcall; + if error != 0 { + return Err(DandelionError::CudaError(get_cu_error_string(error))); + } + } + }; +} + +macro_rules! cuda_checked_call { + ($fcall: expr) => { + unsafe { + let error = $fcall; + if error != 0 { + return Err(DandelionError::CudaError(get_cuda_error_string(error))); + } + } + }; +} + +pub fn initialize() -> DandelionResult<()> { + cu_checked_call!(cuInit(0)); + Ok(()) +} + +pub fn get_device() -> DandelionResult { + let mut ret: i32 = 0; + cuda_checked_call!(cudaGetDevice(&mut ret as *const i32)); + + ret.try_into() + .map_err(|_| DandelionError::EngineResourceError) +} + +pub fn get_device_count() -> DandelionResult { + let mut ret: i32 = -1; + cuda_checked_call!(cudaGetDeviceCount(&mut ret as *const i32)); + + ret.try_into() + .map_err(|_| DandelionError::EngineResourceError) +} + +pub fn set_device(gpu_id: u8) -> DandelionResult<()> { + cuda_checked_call!(cudaSetDevice(gpu_id as *const i32)); + + let mut device: CUdevice = 0 as CUdevice; + cu_checked_call!(cuDeviceGet(&mut device, gpu_id as i32)); + + let mut primary_ctx: CUcontext = null(); + cu_checked_call!(cuDevicePrimaryCtxRetain(&mut primary_ctx, device)); + cu_checked_call!(cuCtxSetCurrent(primary_ctx)); + + Ok(()) +} + +pub fn create_context(device: CUdevice) -> DandelionResult { + let mut context: CUcontext = null(); + // TODO: look at the context flags + // 0 means automatic scheduling + // https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf + cu_checked_call!(cuCtxCreate(&mut context, 0, device)); + + context + .try_into() + .map_err(|_| DandelionError::EngineResourceError) +} + +pub fn finalize(context: CUcontext) -> DandelionResult<()> { + cu_checked_call!(cuCtxDestroy(context)); + Ok(()) +} + +pub fn device_synchronize() -> DandelionResult<()> { + cu_checked_call!(cuCtxSynchronize()); + Ok(()) +} + +pub fn limit_heap_size(size: usize) -> DandelionResult<()> { + // cudaLimitMallocHeapSize = 2 + cuda_checked_call!(cudaDeviceSetLimit(2, size)); + Ok(()) +} + +pub fn module_load(path: &str) -> DandelionResult { + let mut ret: CUmodule = null(); + let fname = + CString::new(path).or(Err(DandelionError::CudaError("Invalid Module Path".into())))?; + cu_checked_call!(cuModuleLoad(&mut ret, fname.as_ptr())); + Ok(Module(ret)) +} + +/// # Safety +/// Requires *image* to point to a valid hsaco code object +pub fn module_load_data(image: *const c_void) -> DandelionResult { + let mut ret: CUmodule = null(); + cu_checked_call!(cuModuleLoadData(&mut ret as *mut CUmodule, image)); + Ok(Module(ret)) +} + +pub fn module_get_function(module: &Module, name: &str) -> DandelionResult { + let mut ret: CUfunction = null(); + let kname = CString::new(name).or(Err(DandelionError::CudaError("Invalid Name".into())))?; + cu_checked_call!(cuModuleGetFunction( + &mut ret as *mut CUfunction, + module.0, + kname.as_ptr() + )); + Ok(Function(ret)) +} + +/// # Safety +/// Requires *kernel_params* to point to an array of valid pointers to kernel arguments +#[allow(clippy::too_many_arguments)] +pub fn module_launch_kernel( + function: &Function, + grid_dim_x: u32, + grid_dim_y: u32, + grid_dim_z: u32, + block_dim_x: u32, + block_dim_y: u32, + block_dim_z: u32, + shared_mem_bytes: u32, + stream: StreamT, + kernel_params: *const *const c_void, + extra: *const *const c_void, +) -> DandelionResult<()> { + cu_checked_call!(cuLaunchKernel( + function.0, + grid_dim_x, + grid_dim_y, + grid_dim_z, + block_dim_x, + block_dim_y, + block_dim_z, + shared_mem_bytes, + stream, + kernel_params, + extra, + )); + Ok(()) +} + +impl Drop for Module { + fn drop(&mut self) { + unsafe { + let error_code = cuModuleUnload(self.0); + if error_code != 0 { + panic!( + "Unloading module failed: {} - {}", + error_code, + get_cu_error_string(error_code) + ); + } + } + } +} + +pub fn gpu_malloc(ptr: *mut *const c_void, size: size_t) -> DandelionResult<()> { + cuda_checked_call!(cudaMalloc(ptr, size)); + Ok(()) +} + +pub fn gpu_zero_mem(ptr: *const c_void, size: size_t) -> DandelionResult<()> { + cuda_checked_call!(cudaMemset(ptr, 0, size)); + Ok(()) +} + +pub fn gpu_free(ptr: *const c_void) -> DandelionResult<()> { + cuda_checked_call!(cudaFree(ptr)); + Ok(()) +} + +impl DeviceAllocation { + pub fn try_new(size: usize) -> DandelionResult { + let mut ret: *const c_void = null(); + let _ = gpu_malloc(&mut ret, size); + // zero out memory + let _ = gpu_zero_mem(ret, size); + + let device = get_device()?; + Ok(Self { + ptr: ret as *const c_void, + size, + device, + }) + } + + pub fn zero_out(&mut self) -> DandelionResult<()> { + gpu_zero_mem(self.ptr, self.size) + } + + pub fn zero_size(&mut self, size: usize) -> DandelionResult<()> { + gpu_zero_mem(self.ptr, size) + } + + pub fn zero_from_to(&mut self, from: usize, to: usize) -> DandelionResult<()> { + let size = to - from; + gpu_zero_mem(self.ptr.wrapping_add(from), size) + } +} + +impl Drop for DeviceAllocation { + fn drop(&mut self) { + // Not entirely sure if this is required but device allocations are freed off the hot path anyway + let curr_dev = get_device().expect("Need to be able to get current device before freeing"); + set_device(self.device).expect("Need to be able to set device before freeing"); + let _ = gpu_free(self.ptr); + set_device(curr_dev).expect("Need to be able to restore device after freeing"); + } +} + +pub fn memcpy_h_to_d( + dst: &DevicePointer, + dev_offset: isize, + src: *const c_void, + size_bytes: usize, +) -> DandelionResult<()> { + cuda_checked_call!(cudaMemcpy( + dst.ptr.byte_offset(dev_offset), + src, + size_bytes, + 1 + )); + Ok(()) +} + +pub fn memcpy_d_to_h( + dst: *const c_void, + src: &DevicePointer, + size_bytes: usize, +) -> DandelionResult<()> { + cuda_checked_call!(cudaMemcpy(dst, src.ptr, size_bytes, 2)); + Ok(()) +} + +pub fn synchronize() -> DandelionResult<()> { + // TODO : make synchronization more fine-grained, e.g. on stream level + cuda_checked_call!(cudaDeviceSynchronize()); + Ok(()) +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_api.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_api.rs new file mode 100644 index 00000000..ffabec21 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_api.rs @@ -0,0 +1,12 @@ +#[cfg(all(feature = "cuda", feature = "hip"))] +compile_error!("Cannot compile with both the cuda and hip features"); + +#[cfg_attr(feature = "cuda", path = "cuda.rs")] +#[cfg_attr(feature = "hip", path = "hip.rs")] +pub mod gpu_api; + +pub use gpu_api::{ + get_device_count, limit_heap_size, memcpy_d_to_h, memcpy_h_to_d, module_get_function, + module_launch_kernel, module_load_data, set_device, synchronize, DeviceAllocation, + DevicePointer, Function, Module, DEFAULT_STREAM, +}; diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests.rs new file mode 100644 index 00000000..00364f8b --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests.rs @@ -0,0 +1,34 @@ +#[cfg(feature = "hip")] +mod hip_tests; + +#[cfg(feature = "cuda")] +mod cuda_tests; + +mod load_utils; +mod tests_utils; + +use crate::function_driver::{ + compute_driver::gpu::{GpuProcessDriver, GpuThreadDriver}, + Driver, +}; +use dandelion_commons::records::{Archive, ArchiveInit, RecordPoint}; +use std::sync::{Arc, Mutex}; + +// To force tests to run sequentially as we might otherwise run out of GPU memory +lazy_static::lazy_static! { + static ref GPU_LOCK: Mutex<()> = Mutex::new(()); +} + +fn get_driver() -> Box { + #[cfg(all(feature = "gpu_process", feature = "gpu_thread"))] + panic!("gpu_process and gpu_thread enabled simultaneously"); + + #[cfg(not(any(feature = "gpu_process", feature = "gpu_thread")))] + panic!("Neither gpu_process nor gpu_thread enabled"); + + #[cfg(feature = "gpu_process")] + return Box::new(GpuProcessDriver {}); + + #[cfg(feature = "gpu_thread")] + return Box::new(GpuThreadDriver {}); +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/cuda_tests.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/cuda_tests.rs new file mode 100644 index 00000000..f2962add --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/cuda_tests.rs @@ -0,0 +1,137 @@ +mod compiled_tests; +mod load_models; + +use crate::{ + function_driver::{ + compute_driver::{ + compute_driver_tests::compute_driver_tests::{ + engine_minimal, get_expected_mat, prepare_engine_and_function, + }, + gpu::gpu_tests::{get_driver, Arc, Archive, ArchiveInit, RecordPoint, GPU_LOCK}, + }, + ComputeResource, Driver, WorkToDo, + }, + memory_domain::{gpu::GpuMemoryDomain, ContextTrait, MemoryResource}, + DataItem, DataSet, Position, +}; + +#[test] +fn minimal() { + let lock = GPU_LOCK.lock().unwrap(); + let driver: Box = get_driver(); + engine_minimal::( + &format!( + "{}/tests/data/cuda/test_gpu_minimal.json", + env!("CARGO_MANIFEST_DIR") + ), + MemoryResource::Shared { + id: 0, + size: (1 << 30), + }, + driver, + vec![ComputeResource::GPU(7, 1, 2)], + ); + drop(lock); +} + +#[test] +fn minimal_matmul() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/cuda/test_gpu_matmul.json", + env!("CARGO_MANIFEST_DIR") + ); + let dom_init = MemoryResource::Shared { + id: 0, + size: (1 << 30), + }; + let driver: Box = get_driver(); + let drv_init = vec![ComputeResource::GPU(7, 2, 2)]; + let (mut function_context, config, queue) = + prepare_engine_and_function::(filename, dom_init, &driver, drv_init); + + let n = [4 as i32; 1]; + let a_size: usize = 4 * 4 * (32 / 8); + let b_size = 4 * 4 * (32 / 8); + let c_size = 4 * 4 * (32 / 8); + + let mut a_slice: Vec = vec![0.0; 4 * 4]; + let mut b_slice: Vec = vec![0.0; 4 * 4]; + for i in 0..16 { + a_slice[i] = i as f32; + b_slice[i] = (i as f32) * 2.0; + } + + let a_offset = function_context + .get_free_space_and_write_slice(&a_slice) + .expect("Should have space"); + let b_offset = function_context + .get_free_space_and_write_slice(&b_slice) + .expect("Should have space"); + + function_context.content.push(Some(DataSet { + ident: "A".to_string(), + buffers: vec![DataItem { + ident: "".to_string(), + data: Position { + offset: a_offset as usize, + size: a_size, + }, + key: 0, + }], + })); + function_context.content.push(Some(DataSet { + ident: "B".to_string(), + buffers: vec![DataItem { + ident: "".to_string(), + data: Position { + offset: b_offset as usize, + size: b_size, + }, + key: 0, + }], + })); + let archive = Box::leak(Box::new(Archive::init(ArchiveInit { + #[cfg(feature = "timestamp")] + timestamp_count: 1000, + }))); + + let mut recorder = archive.get_recorder().unwrap(); + recorder + .record(RecordPoint::TransferEnd) + .expect("Should have properly initialized recorder state"); + let promise = queue.enqueu(WorkToDo::FunctionArguments { + config, + context: function_context, + output_sets: Arc::new(vec![String::from("C")]), + recorder: recorder.get_sub_recorder().unwrap(), + }); + queue.enqueu(WorkToDo::Shutdown()); + let result_context = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap() + .block_on(promise) + .expect("Engine should run ok with basic function") + .get_context(); + recorder + .record(RecordPoint::FutureReturn) + .expect("Should have properly advanced recorder state"); + let output_item = result_context.content[0] + .as_ref() + .expect("Set should be present"); + let position = output_item.buffers[0].data; + assert_eq!(c_size, position.size, "Checking for size of output"); + let mut read_buffer = vec![0f32; position.size / (32 / 8)]; + result_context + .context + .read(position.offset, &mut read_buffer) + .expect("Should succeed in reading"); + let expected = vec![ + 112f32, 124f32, 136f32, 148f32, 304f32, 348f32, 392f32, 436f32, 496f32, 572f32, 648f32, + 724f32, 688f32, 796f32, 904f32, 1012f32, + ]; + for (should, is) in expected.iter().zip(read_buffer.iter()) { + assert_eq!(should, is, "Checking final result"); + } + drop(lock); +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/cuda_tests/compiled_tests.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/cuda_tests/compiled_tests.rs new file mode 100644 index 00000000..7450f59c --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/cuda_tests/compiled_tests.rs @@ -0,0 +1,176 @@ +use crate::{ + function_driver::compute_driver::gpu::gpu_tests::{ + cuda_tests::load_models::*, + get_driver, + tests_utils::{ + compare_result, execute_test, get_result, get_resulti32, get_resulti64, setup_test, + }, + GPU_LOCK, + }, + memory_domain::Context, +}; +use std::{collections::HashMap, sync::Arc}; + +#[test] +fn test_all() { + test_model("simple", true); + test_model("double_matmul", true); + test_model("resnet18", true); + test_model("rnn", true); + test_model("lstm", true); + test_model("vit_b_16", true); + test_model("bert", true); +} + +fn get_function(model_name: &str) -> Option (usize, String, Vec, Context)> { + let mut methods: HashMap<_, fn(Context) -> (usize, String, Vec, Context)> = HashMap::new(); + + methods.insert("simple", load_simple); + methods.insert("double_matmul", load_double_matmul); + methods.insert("resnet18", load_resnet18); + methods.insert("rnn", load_rnn); + methods.insert("lstm", load_lstm); + methods.insert("vit_b_16", load_vit_b_16); + methods.insert("bert", load_bert); + methods.insert("llama", load_llama); + methods.insert("llama_kv", load_llama_kv); + + methods.get(model_name).copied() +} + +fn test_model(model_name: &str, asserts: bool) { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/cuda/test_gpu_{}.json", + env!("CARGO_MANIFEST_DIR"), + model_name + ); + let (function_context, config, queue) = setup_test(&filename); + let load_function = get_function(model_name).unwrap_or_else(|| { + panic!("Model name \"{model_name}\" not recognized. Add it to the methods hash map.") + }); + let (output_size, output_name, expected, function_context) = load_function(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, asserts); + compare_result(expected, read_buffer, asserts); + drop(lock); +} + +#[test] +fn simple() { + test_model("simple", true); +} + +#[test] +fn double_matmul() { + test_model("double_matmul", true); +} + +#[test] +fn resnet18() { + test_model("resnet18", true); +} + +#[test] +fn rnn() { + test_model("rnn", true); +} + +#[test] +fn lstm() { + test_model("lstm", true); +} + +#[test] +fn vit_b_16() { + test_model("vit_b_16", true); +} + +#[test] +fn bert() { + test_model("bert", true); +} + +#[test] +fn one_llama() { + test_model("llama", false); +} + +#[test] +fn full_llama() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/cuda/test_gpu_llama-full.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (mut output_size, mut output_name, expected, function_context) = + load_llama(function_context); + output_size = 1024; + output_name = "token_ids".to_string(); + + let result_context = execute_test(function_context, config, queue, &output_name); + + let read_buffer = get_resulti64(result_context, output_size, false); + println!("{:?}", read_buffer); + + drop(lock); +} + +#[test] +fn one_kv_llama() { + test_model("llama_kv", true); +} + +#[test] +fn hand_kv_llama() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/cuda/test_gpu_llama_kv.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (mut output_size, mut output_name, expected, function_context) = + load_llama_kv(function_context); + output_size = 16777216; // 1024; + output_name = "b41".to_string(); // "token_ids".to_string(); + + let result_context = execute_test(function_context, config, queue, &output_name); + + // let read_buffer = get_resulti64(result_context, output_size, false); + let read_buffer = get_result(result_context, output_size, false); + for i in 0..16777216 / 4 { + if read_buffer[i] != 0.0 { + println!("{} - {:?}, ", i, read_buffer[i]); + } + } + + drop(lock); +} + +#[test] +fn full_kv_llama() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/cuda/test_gpu_llama_kv-full.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (mut output_size, mut output_name, expected, function_context) = + full_load_llama_kv(function_context); + output_size = 1024; + output_name = "token_ids".to_string(); + + let result_context = execute_test(function_context, config, queue, &output_name); + + let read_buffer = get_resulti64(result_context, output_size, false); + println!("{:?}", read_buffer); + /*let read_buffer = get_result(result_context, output_size, false); + for i in 5000..5100 { // 16777216 / 4 { + if read_buffer[i] != 0.0 { + println!("{} - {:?}, ", i, read_buffer[i]); + } + }*/ + + drop(lock); +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/cuda_tests/load_models.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/cuda_tests/load_models.rs new file mode 100644 index 00000000..1aa9bfc6 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/cuda_tests/load_models.rs @@ -0,0 +1,3493 @@ +use crate::{ + function_driver::compute_driver::gpu::gpu_tests::load_utils::{ + add_buffer, add_empty_buffer, add_number, read_tensor_from_file, + }, + memory_domain::Context, +}; + +pub fn load_bert(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/bert/"; + + add_buffer("input_ids", 1024, params_path, &mut function_context); + add_buffer("attention_mask", 1024, params_path, &mut function_context); + add_buffer("p0", 93763584, params_path, &mut function_context); + add_buffer("p1", 393216, params_path, &mut function_context); + add_buffer("p2", 3072, params_path, &mut function_context); + add_buffer("p3", 3072, params_path, &mut function_context); + add_buffer("p4", 2359296, params_path, &mut function_context); + add_buffer("p5", 3072, params_path, &mut function_context); + add_buffer("p6", 4, params_path, &mut function_context); + add_buffer("p7", 2359296, params_path, &mut function_context); + add_buffer("p8", 3072, params_path, &mut function_context); + add_buffer("p9", 4, params_path, &mut function_context); + add_buffer("p10", 2359296, params_path, &mut function_context); + add_buffer("p11", 3072, params_path, &mut function_context); + add_buffer("p12", 2359296, params_path, &mut function_context); + add_buffer("p13", 3072, params_path, &mut function_context); + add_buffer("p14", 3072, params_path, &mut function_context); + add_buffer("p15", 3072, params_path, &mut function_context); + add_buffer("p16", 9437184, params_path, &mut function_context); + add_buffer("p17", 12288, params_path, &mut function_context); + add_buffer("p18", 9437184, params_path, &mut function_context); + add_buffer("p19", 3072, params_path, &mut function_context); + add_buffer("p20", 3072, params_path, &mut function_context); + add_buffer("p21", 3072, params_path, &mut function_context); + add_buffer("p22", 2359296, params_path, &mut function_context); + add_buffer("p23", 3072, params_path, &mut function_context); + add_buffer("p24", 4, params_path, &mut function_context); + add_buffer("p25", 2359296, params_path, &mut function_context); + add_buffer("p26", 3072, params_path, &mut function_context); + add_buffer("p27", 4, params_path, &mut function_context); + add_buffer("p28", 2359296, params_path, &mut function_context); + add_buffer("p29", 3072, params_path, &mut function_context); + add_buffer("p30", 2359296, params_path, &mut function_context); + add_buffer("p31", 3072, params_path, &mut function_context); + add_buffer("p32", 3072, params_path, &mut function_context); + add_buffer("p33", 3072, params_path, &mut function_context); + add_buffer("p34", 9437184, params_path, &mut function_context); + add_buffer("p35", 12288, params_path, &mut function_context); + add_buffer("p36", 9437184, params_path, &mut function_context); + add_buffer("p37", 3072, params_path, &mut function_context); + add_buffer("p38", 3072, params_path, &mut function_context); + add_buffer("p39", 3072, params_path, &mut function_context); + add_buffer("p40", 2359296, params_path, &mut function_context); + add_buffer("p41", 3072, params_path, &mut function_context); + add_buffer("p42", 4, params_path, &mut function_context); + add_buffer("p43", 2359296, params_path, &mut function_context); + add_buffer("p44", 3072, params_path, &mut function_context); + add_buffer("p45", 4, params_path, &mut function_context); + add_buffer("p46", 2359296, params_path, &mut function_context); + add_buffer("p47", 3072, params_path, &mut function_context); + add_buffer("p48", 2359296, params_path, &mut function_context); + add_buffer("p49", 3072, params_path, &mut function_context); + add_buffer("p50", 3072, params_path, &mut function_context); + add_buffer("p51", 3072, params_path, &mut function_context); + add_buffer("p52", 9437184, params_path, &mut function_context); + add_buffer("p53", 12288, params_path, &mut function_context); + add_buffer("p54", 9437184, params_path, &mut function_context); + add_buffer("p55", 3072, params_path, &mut function_context); + add_buffer("p56", 3072, params_path, &mut function_context); + add_buffer("p57", 3072, params_path, &mut function_context); + add_buffer("p58", 2359296, params_path, &mut function_context); + add_buffer("p59", 3072, params_path, &mut function_context); + add_buffer("p60", 4, params_path, &mut function_context); + add_buffer("p61", 2359296, params_path, &mut function_context); + add_buffer("p62", 3072, params_path, &mut function_context); + add_buffer("p63", 4, params_path, &mut function_context); + add_buffer("p64", 2359296, params_path, &mut function_context); + add_buffer("p65", 3072, params_path, &mut function_context); + add_buffer("p66", 2359296, params_path, &mut function_context); + add_buffer("p67", 3072, params_path, &mut function_context); + add_buffer("p68", 3072, params_path, &mut function_context); + add_buffer("p69", 3072, params_path, &mut function_context); + add_buffer("p70", 9437184, params_path, &mut function_context); + add_buffer("p71", 12288, params_path, &mut function_context); + add_buffer("p72", 9437184, params_path, &mut function_context); + add_buffer("p73", 3072, params_path, &mut function_context); + add_buffer("p74", 3072, params_path, &mut function_context); + add_buffer("p75", 3072, params_path, &mut function_context); + add_buffer("p76", 2359296, params_path, &mut function_context); + add_buffer("p77", 3072, params_path, &mut function_context); + add_buffer("p78", 4, params_path, &mut function_context); + add_buffer("p79", 2359296, params_path, &mut function_context); + add_buffer("p80", 3072, params_path, &mut function_context); + add_buffer("p81", 4, params_path, &mut function_context); + add_buffer("p82", 2359296, params_path, &mut function_context); + add_buffer("p83", 3072, params_path, &mut function_context); + add_buffer("p84", 2359296, params_path, &mut function_context); + add_buffer("p85", 3072, params_path, &mut function_context); + add_buffer("p86", 3072, params_path, &mut function_context); + add_buffer("p87", 3072, params_path, &mut function_context); + add_buffer("p88", 9437184, params_path, &mut function_context); + add_buffer("p89", 12288, params_path, &mut function_context); + add_buffer("p90", 9437184, params_path, &mut function_context); + add_buffer("p91", 3072, params_path, &mut function_context); + add_buffer("p92", 3072, params_path, &mut function_context); + add_buffer("p93", 3072, params_path, &mut function_context); + add_buffer("p94", 2359296, params_path, &mut function_context); + add_buffer("p95", 3072, params_path, &mut function_context); + add_buffer("p96", 4, params_path, &mut function_context); + add_buffer("p97", 2359296, params_path, &mut function_context); + add_buffer("p98", 3072, params_path, &mut function_context); + add_buffer("p99", 4, params_path, &mut function_context); + add_buffer("p100", 2359296, params_path, &mut function_context); + add_buffer("p101", 3072, params_path, &mut function_context); + add_buffer("p102", 2359296, params_path, &mut function_context); + add_buffer("p103", 3072, params_path, &mut function_context); + add_buffer("p104", 3072, params_path, &mut function_context); + add_buffer("p105", 3072, params_path, &mut function_context); + add_buffer("p106", 9437184, params_path, &mut function_context); + add_buffer("p107", 12288, params_path, &mut function_context); + add_buffer("p108", 9437184, params_path, &mut function_context); + add_buffer("p109", 3072, params_path, &mut function_context); + add_buffer("p110", 3072, params_path, &mut function_context); + add_buffer("p111", 3072, params_path, &mut function_context); + add_buffer("p112", 2359296, params_path, &mut function_context); + add_buffer("p113", 3072, params_path, &mut function_context); + add_buffer("p114", 4, params_path, &mut function_context); + add_buffer("p115", 2359296, params_path, &mut function_context); + add_buffer("p116", 3072, params_path, &mut function_context); + add_buffer("p117", 4, params_path, &mut function_context); + add_buffer("p118", 2359296, params_path, &mut function_context); + add_buffer("p119", 3072, params_path, &mut function_context); + add_buffer("p120", 2359296, params_path, &mut function_context); + add_buffer("p121", 3072, params_path, &mut function_context); + add_buffer("p122", 3072, params_path, &mut function_context); + add_buffer("p123", 3072, params_path, &mut function_context); + add_buffer("p124", 9437184, params_path, &mut function_context); + add_buffer("p125", 12288, params_path, &mut function_context); + add_buffer("p126", 9437184, params_path, &mut function_context); + add_buffer("p127", 3072, params_path, &mut function_context); + add_buffer("p128", 3072, params_path, &mut function_context); + add_buffer("p129", 3072, params_path, &mut function_context); + add_buffer("p130", 2359296, params_path, &mut function_context); + add_buffer("p131", 3072, params_path, &mut function_context); + add_buffer("p132", 4, params_path, &mut function_context); + add_buffer("p133", 2359296, params_path, &mut function_context); + add_buffer("p134", 3072, params_path, &mut function_context); + add_buffer("p135", 4, params_path, &mut function_context); + add_buffer("p136", 2359296, params_path, &mut function_context); + add_buffer("p137", 3072, params_path, &mut function_context); + add_buffer("p138", 2359296, params_path, &mut function_context); + add_buffer("p139", 3072, params_path, &mut function_context); + add_buffer("p140", 3072, params_path, &mut function_context); + add_buffer("p141", 3072, params_path, &mut function_context); + add_buffer("p142", 9437184, params_path, &mut function_context); + add_buffer("p143", 12288, params_path, &mut function_context); + add_buffer("p144", 9437184, params_path, &mut function_context); + add_buffer("p145", 3072, params_path, &mut function_context); + add_buffer("p146", 3072, params_path, &mut function_context); + add_buffer("p147", 3072, params_path, &mut function_context); + add_buffer("p148", 2359296, params_path, &mut function_context); + add_buffer("p149", 3072, params_path, &mut function_context); + add_buffer("p150", 4, params_path, &mut function_context); + add_buffer("p151", 2359296, params_path, &mut function_context); + add_buffer("p152", 3072, params_path, &mut function_context); + add_buffer("p153", 4, params_path, &mut function_context); + add_buffer("p154", 2359296, params_path, &mut function_context); + add_buffer("p155", 3072, params_path, &mut function_context); + add_buffer("p156", 2359296, params_path, &mut function_context); + add_buffer("p157", 3072, params_path, &mut function_context); + add_buffer("p158", 3072, params_path, &mut function_context); + add_buffer("p159", 3072, params_path, &mut function_context); + add_buffer("p160", 9437184, params_path, &mut function_context); + add_buffer("p161", 12288, params_path, &mut function_context); + add_buffer("p162", 9437184, params_path, &mut function_context); + add_buffer("p163", 3072, params_path, &mut function_context); + add_buffer("p164", 3072, params_path, &mut function_context); + add_buffer("p165", 3072, params_path, &mut function_context); + add_buffer("p166", 2359296, params_path, &mut function_context); + add_buffer("p167", 3072, params_path, &mut function_context); + add_buffer("p168", 4, params_path, &mut function_context); + add_buffer("p169", 2359296, params_path, &mut function_context); + add_buffer("p170", 3072, params_path, &mut function_context); + add_buffer("p171", 4, params_path, &mut function_context); + add_buffer("p172", 2359296, params_path, &mut function_context); + add_buffer("p173", 3072, params_path, &mut function_context); + add_buffer("p174", 2359296, params_path, &mut function_context); + add_buffer("p175", 3072, params_path, &mut function_context); + add_buffer("p176", 3072, params_path, &mut function_context); + add_buffer("p177", 3072, params_path, &mut function_context); + add_buffer("p178", 9437184, params_path, &mut function_context); + add_buffer("p179", 12288, params_path, &mut function_context); + add_buffer("p180", 9437184, params_path, &mut function_context); + add_buffer("p181", 3072, params_path, &mut function_context); + add_buffer("p182", 3072, params_path, &mut function_context); + add_buffer("p183", 3072, params_path, &mut function_context); + add_buffer("p184", 2359296, params_path, &mut function_context); + add_buffer("p185", 3072, params_path, &mut function_context); + add_buffer("p186", 4, params_path, &mut function_context); + add_buffer("p187", 2359296, params_path, &mut function_context); + add_buffer("p188", 3072, params_path, &mut function_context); + add_buffer("p189", 4, params_path, &mut function_context); + add_buffer("p190", 2359296, params_path, &mut function_context); + add_buffer("p191", 3072, params_path, &mut function_context); + add_buffer("p192", 2359296, params_path, &mut function_context); + add_buffer("p193", 3072, params_path, &mut function_context); + add_buffer("p194", 3072, params_path, &mut function_context); + add_buffer("p195", 3072, params_path, &mut function_context); + add_buffer("p196", 9437184, params_path, &mut function_context); + add_buffer("p197", 12288, params_path, &mut function_context); + add_buffer("p198", 9437184, params_path, &mut function_context); + add_buffer("p199", 3072, params_path, &mut function_context); + add_buffer("p200", 3072, params_path, &mut function_context); + add_buffer("p201", 3072, params_path, &mut function_context); + add_buffer("p202", 2359296, params_path, &mut function_context); + add_buffer("p203", 3072, params_path, &mut function_context); + add_buffer("p204", 4, params_path, &mut function_context); + add_buffer("p205", 2359296, params_path, &mut function_context); + add_buffer("p206", 3072, params_path, &mut function_context); + add_buffer("p207", 4, params_path, &mut function_context); + add_buffer("p208", 2359296, params_path, &mut function_context); + add_buffer("p209", 3072, params_path, &mut function_context); + add_buffer("p210", 2359296, params_path, &mut function_context); + add_buffer("p211", 3072, params_path, &mut function_context); + add_buffer("p212", 3072, params_path, &mut function_context); + add_buffer("p213", 3072, params_path, &mut function_context); + add_buffer("p214", 9437184, params_path, &mut function_context); + add_buffer("p215", 12288, params_path, &mut function_context); + add_buffer("p216", 9437184, params_path, &mut function_context); + add_buffer("p217", 3072, params_path, &mut function_context); + add_buffer("p218", 3072, params_path, &mut function_context); + add_buffer("p219", 3072, params_path, &mut function_context); + add_buffer("p220", 2359296, params_path, &mut function_context); + add_buffer("p221", 3072, params_path, &mut function_context); + add_buffer("p222", 3072, params_path, &mut function_context); + add_buffer("p223", 3072, params_path, &mut function_context); + add_buffer("p224", 93763584, params_path, &mut function_context); + add_buffer("p225", 122088, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 15627264; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_rnn(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/rnn/"; + + add_buffer("input", 3136, params_path, &mut function_context); + add_buffer("p0", 14336, params_path, &mut function_context); + add_buffer("p1", 512, params_path, &mut function_context); + add_buffer("p2", 512, params_path, &mut function_context); + add_buffer("p3", 65536, params_path, &mut function_context); + add_buffer("p4", 512, params_path, &mut function_context); + add_buffer("p5", 65536, params_path, &mut function_context); + add_buffer("p6", 512, params_path, &mut function_context); + add_buffer("p7", 512, params_path, &mut function_context); + add_buffer("p8", 65536, params_path, &mut function_context); + add_buffer("p9", 512, params_path, &mut function_context); + add_buffer("p10", 65536, params_path, &mut function_context); + add_buffer("p11", 512, params_path, &mut function_context); + add_buffer("p12", 512, params_path, &mut function_context); + add_buffer("p13", 65536, params_path, &mut function_context); + add_buffer("p14", 512, params_path, &mut function_context); + add_buffer("p15", 65536, params_path, &mut function_context); + add_buffer("p16", 512, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 65536, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 65536, params_path, &mut function_context); + add_buffer("p21", 512, params_path, &mut function_context); + add_buffer("p22", 512, params_path, &mut function_context); + add_buffer("p23", 65536, params_path, &mut function_context); + add_buffer("p24", 512, params_path, &mut function_context); + add_buffer("p25", 5120, params_path, &mut function_context); + add_buffer("p26", 40, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 512; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_lstm(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/lstm/"; + + add_buffer("input", 3136, params_path, &mut function_context); + add_buffer("p0", 512, params_path, &mut function_context); + add_buffer("p1", 319488, params_path, &mut function_context); + add_buffer("p2", 2048, params_path, &mut function_context); + add_buffer("p3", 512, params_path, &mut function_context); + add_buffer("p4", 319488, params_path, &mut function_context); + add_buffer("p5", 2048, params_path, &mut function_context); + add_buffer("p6", 319488, params_path, &mut function_context); + add_buffer("p7", 2048, params_path, &mut function_context); + add_buffer("p8", 319488, params_path, &mut function_context); + add_buffer("p9", 2048, params_path, &mut function_context); + add_buffer("p10", 319488, params_path, &mut function_context); + add_buffer("p11", 2048, params_path, &mut function_context); + add_buffer("p12", 319488, params_path, &mut function_context); + add_buffer("p13", 2048, params_path, &mut function_context); + add_buffer("p14", 319488, params_path, &mut function_context); + add_buffer("p15", 2048, params_path, &mut function_context); + add_buffer("p16", 319488, params_path, &mut function_context); + add_buffer("p17", 2048, params_path, &mut function_context); + add_buffer("p18", 319488, params_path, &mut function_context); + add_buffer("p19", 2048, params_path, &mut function_context); + add_buffer("p20", 319488, params_path, &mut function_context); + add_buffer("p21", 2048, params_path, &mut function_context); + add_buffer("p22", 319488, params_path, &mut function_context); + add_buffer("p23", 2048, params_path, &mut function_context); + add_buffer("p24", 319488, params_path, &mut function_context); + add_buffer("p25", 2048, params_path, &mut function_context); + add_buffer("p26", 319488, params_path, &mut function_context); + add_buffer("p27", 2048, params_path, &mut function_context); + add_buffer("p28", 319488, params_path, &mut function_context); + add_buffer("p29", 2048, params_path, &mut function_context); + add_buffer("p30", 319488, params_path, &mut function_context); + add_buffer("p31", 2048, params_path, &mut function_context); + add_buffer("p32", 319488, params_path, &mut function_context); + add_buffer("p33", 2048, params_path, &mut function_context); + add_buffer("p34", 319488, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 319488, params_path, &mut function_context); + add_buffer("p37", 2048, params_path, &mut function_context); + add_buffer("p38", 319488, params_path, &mut function_context); + add_buffer("p39", 2048, params_path, &mut function_context); + add_buffer("p40", 319488, params_path, &mut function_context); + add_buffer("p41", 2048, params_path, &mut function_context); + add_buffer("p42", 319488, params_path, &mut function_context); + add_buffer("p43", 2048, params_path, &mut function_context); + add_buffer("p44", 319488, params_path, &mut function_context); + add_buffer("p45", 2048, params_path, &mut function_context); + add_buffer("p46", 319488, params_path, &mut function_context); + add_buffer("p47", 2048, params_path, &mut function_context); + add_buffer("p48", 319488, params_path, &mut function_context); + add_buffer("p49", 2048, params_path, &mut function_context); + add_buffer("p50", 319488, params_path, &mut function_context); + add_buffer("p51", 2048, params_path, &mut function_context); + add_buffer("p52", 319488, params_path, &mut function_context); + add_buffer("p53", 2048, params_path, &mut function_context); + add_buffer("p54", 319488, params_path, &mut function_context); + add_buffer("p55", 2048, params_path, &mut function_context); + add_buffer("p56", 319488, params_path, &mut function_context); + add_buffer("p57", 2048, params_path, &mut function_context); + add_buffer("p58", 512, params_path, &mut function_context); + add_buffer("p59", 524288, params_path, &mut function_context); + add_buffer("p60", 2048, params_path, &mut function_context); + add_buffer("p61", 512, params_path, &mut function_context); + add_buffer("p62", 524288, params_path, &mut function_context); + add_buffer("p63", 2048, params_path, &mut function_context); + add_buffer("p64", 524288, params_path, &mut function_context); + add_buffer("p65", 2048, params_path, &mut function_context); + add_buffer("p66", 524288, params_path, &mut function_context); + add_buffer("p67", 2048, params_path, &mut function_context); + add_buffer("p68", 524288, params_path, &mut function_context); + add_buffer("p69", 2048, params_path, &mut function_context); + add_buffer("p70", 524288, params_path, &mut function_context); + add_buffer("p71", 2048, params_path, &mut function_context); + add_buffer("p72", 524288, params_path, &mut function_context); + add_buffer("p73", 2048, params_path, &mut function_context); + add_buffer("p74", 524288, params_path, &mut function_context); + add_buffer("p75", 2048, params_path, &mut function_context); + add_buffer("p76", 524288, params_path, &mut function_context); + add_buffer("p77", 2048, params_path, &mut function_context); + add_buffer("p78", 524288, params_path, &mut function_context); + add_buffer("p79", 2048, params_path, &mut function_context); + add_buffer("p80", 524288, params_path, &mut function_context); + add_buffer("p81", 2048, params_path, &mut function_context); + add_buffer("p82", 524288, params_path, &mut function_context); + add_buffer("p83", 2048, params_path, &mut function_context); + add_buffer("p84", 524288, params_path, &mut function_context); + add_buffer("p85", 2048, params_path, &mut function_context); + add_buffer("p86", 524288, params_path, &mut function_context); + add_buffer("p87", 2048, params_path, &mut function_context); + add_buffer("p88", 524288, params_path, &mut function_context); + add_buffer("p89", 2048, params_path, &mut function_context); + add_buffer("p90", 524288, params_path, &mut function_context); + add_buffer("p91", 2048, params_path, &mut function_context); + add_buffer("p92", 524288, params_path, &mut function_context); + add_buffer("p93", 2048, params_path, &mut function_context); + add_buffer("p94", 524288, params_path, &mut function_context); + add_buffer("p95", 2048, params_path, &mut function_context); + add_buffer("p96", 524288, params_path, &mut function_context); + add_buffer("p97", 2048, params_path, &mut function_context); + add_buffer("p98", 524288, params_path, &mut function_context); + add_buffer("p99", 2048, params_path, &mut function_context); + add_buffer("p100", 524288, params_path, &mut function_context); + add_buffer("p101", 2048, params_path, &mut function_context); + add_buffer("p102", 524288, params_path, &mut function_context); + add_buffer("p103", 2048, params_path, &mut function_context); + add_buffer("p104", 524288, params_path, &mut function_context); + add_buffer("p105", 2048, params_path, &mut function_context); + add_buffer("p106", 524288, params_path, &mut function_context); + add_buffer("p107", 2048, params_path, &mut function_context); + add_buffer("p108", 524288, params_path, &mut function_context); + add_buffer("p109", 2048, params_path, &mut function_context); + add_buffer("p110", 524288, params_path, &mut function_context); + add_buffer("p111", 2048, params_path, &mut function_context); + add_buffer("p112", 524288, params_path, &mut function_context); + add_buffer("p113", 2048, params_path, &mut function_context); + add_buffer("p114", 524288, params_path, &mut function_context); + add_buffer("p115", 2048, params_path, &mut function_context); + add_buffer("p116", 512, params_path, &mut function_context); + add_buffer("p117", 524288, params_path, &mut function_context); + add_buffer("p118", 2048, params_path, &mut function_context); + add_buffer("p119", 512, params_path, &mut function_context); + add_buffer("p120", 524288, params_path, &mut function_context); + add_buffer("p121", 2048, params_path, &mut function_context); + add_buffer("p122", 524288, params_path, &mut function_context); + add_buffer("p123", 2048, params_path, &mut function_context); + add_buffer("p124", 524288, params_path, &mut function_context); + add_buffer("p125", 2048, params_path, &mut function_context); + add_buffer("p126", 524288, params_path, &mut function_context); + add_buffer("p127", 2048, params_path, &mut function_context); + add_buffer("p128", 524288, params_path, &mut function_context); + add_buffer("p129", 2048, params_path, &mut function_context); + add_buffer("p130", 524288, params_path, &mut function_context); + add_buffer("p131", 2048, params_path, &mut function_context); + add_buffer("p132", 524288, params_path, &mut function_context); + add_buffer("p133", 2048, params_path, &mut function_context); + add_buffer("p134", 524288, params_path, &mut function_context); + add_buffer("p135", 2048, params_path, &mut function_context); + add_buffer("p136", 524288, params_path, &mut function_context); + add_buffer("p137", 2048, params_path, &mut function_context); + add_buffer("p138", 524288, params_path, &mut function_context); + add_buffer("p139", 2048, params_path, &mut function_context); + add_buffer("p140", 524288, params_path, &mut function_context); + add_buffer("p141", 2048, params_path, &mut function_context); + add_buffer("p142", 524288, params_path, &mut function_context); + add_buffer("p143", 2048, params_path, &mut function_context); + add_buffer("p144", 524288, params_path, &mut function_context); + add_buffer("p145", 2048, params_path, &mut function_context); + add_buffer("p146", 524288, params_path, &mut function_context); + add_buffer("p147", 2048, params_path, &mut function_context); + add_buffer("p148", 524288, params_path, &mut function_context); + add_buffer("p149", 2048, params_path, &mut function_context); + add_buffer("p150", 524288, params_path, &mut function_context); + add_buffer("p151", 2048, params_path, &mut function_context); + add_buffer("p152", 524288, params_path, &mut function_context); + add_buffer("p153", 2048, params_path, &mut function_context); + add_buffer("p154", 524288, params_path, &mut function_context); + add_buffer("p155", 2048, params_path, &mut function_context); + add_buffer("p156", 524288, params_path, &mut function_context); + add_buffer("p157", 2048, params_path, &mut function_context); + add_buffer("p158", 524288, params_path, &mut function_context); + add_buffer("p159", 2048, params_path, &mut function_context); + add_buffer("p160", 524288, params_path, &mut function_context); + add_buffer("p161", 2048, params_path, &mut function_context); + add_buffer("p162", 524288, params_path, &mut function_context); + add_buffer("p163", 2048, params_path, &mut function_context); + add_buffer("p164", 524288, params_path, &mut function_context); + add_buffer("p165", 2048, params_path, &mut function_context); + add_buffer("p166", 524288, params_path, &mut function_context); + add_buffer("p167", 2048, params_path, &mut function_context); + add_buffer("p168", 524288, params_path, &mut function_context); + add_buffer("p169", 2048, params_path, &mut function_context); + add_buffer("p170", 524288, params_path, &mut function_context); + add_buffer("p171", 2048, params_path, &mut function_context); + add_buffer("p172", 524288, params_path, &mut function_context); + add_buffer("p173", 2048, params_path, &mut function_context); + add_buffer("p174", 512, params_path, &mut function_context); + add_buffer("p175", 524288, params_path, &mut function_context); + add_buffer("p176", 2048, params_path, &mut function_context); + add_buffer("p177", 512, params_path, &mut function_context); + add_buffer("p178", 524288, params_path, &mut function_context); + add_buffer("p179", 2048, params_path, &mut function_context); + add_buffer("p180", 524288, params_path, &mut function_context); + add_buffer("p181", 2048, params_path, &mut function_context); + add_buffer("p182", 524288, params_path, &mut function_context); + add_buffer("p183", 2048, params_path, &mut function_context); + add_buffer("p184", 524288, params_path, &mut function_context); + add_buffer("p185", 2048, params_path, &mut function_context); + add_buffer("p186", 524288, params_path, &mut function_context); + add_buffer("p187", 2048, params_path, &mut function_context); + add_buffer("p188", 524288, params_path, &mut function_context); + add_buffer("p189", 2048, params_path, &mut function_context); + add_buffer("p190", 524288, params_path, &mut function_context); + add_buffer("p191", 2048, params_path, &mut function_context); + add_buffer("p192", 524288, params_path, &mut function_context); + add_buffer("p193", 2048, params_path, &mut function_context); + add_buffer("p194", 524288, params_path, &mut function_context); + add_buffer("p195", 2048, params_path, &mut function_context); + add_buffer("p196", 524288, params_path, &mut function_context); + add_buffer("p197", 2048, params_path, &mut function_context); + add_buffer("p198", 524288, params_path, &mut function_context); + add_buffer("p199", 2048, params_path, &mut function_context); + add_buffer("p200", 524288, params_path, &mut function_context); + add_buffer("p201", 2048, params_path, &mut function_context); + add_buffer("p202", 524288, params_path, &mut function_context); + add_buffer("p203", 2048, params_path, &mut function_context); + add_buffer("p204", 524288, params_path, &mut function_context); + add_buffer("p205", 2048, params_path, &mut function_context); + add_buffer("p206", 524288, params_path, &mut function_context); + add_buffer("p207", 2048, params_path, &mut function_context); + add_buffer("p208", 524288, params_path, &mut function_context); + add_buffer("p209", 2048, params_path, &mut function_context); + add_buffer("p210", 524288, params_path, &mut function_context); + add_buffer("p211", 2048, params_path, &mut function_context); + add_buffer("p212", 524288, params_path, &mut function_context); + add_buffer("p213", 2048, params_path, &mut function_context); + add_buffer("p214", 524288, params_path, &mut function_context); + add_buffer("p215", 2048, params_path, &mut function_context); + add_buffer("p216", 524288, params_path, &mut function_context); + add_buffer("p217", 2048, params_path, &mut function_context); + add_buffer("p218", 524288, params_path, &mut function_context); + add_buffer("p219", 2048, params_path, &mut function_context); + add_buffer("p220", 524288, params_path, &mut function_context); + add_buffer("p221", 2048, params_path, &mut function_context); + add_buffer("p222", 524288, params_path, &mut function_context); + add_buffer("p223", 2048, params_path, &mut function_context); + add_buffer("p224", 524288, params_path, &mut function_context); + add_buffer("p225", 2048, params_path, &mut function_context); + add_buffer("p226", 524288, params_path, &mut function_context); + add_buffer("p227", 2048, params_path, &mut function_context); + add_buffer("p228", 524288, params_path, &mut function_context); + add_buffer("p229", 2048, params_path, &mut function_context); + add_buffer("p230", 524288, params_path, &mut function_context); + add_buffer("p231", 2048, params_path, &mut function_context); + add_buffer("p232", 512, params_path, &mut function_context); + add_buffer("p233", 524288, params_path, &mut function_context); + add_buffer("p234", 2048, params_path, &mut function_context); + add_buffer("p235", 512, params_path, &mut function_context); + add_buffer("p236", 524288, params_path, &mut function_context); + add_buffer("p237", 2048, params_path, &mut function_context); + add_buffer("p238", 524288, params_path, &mut function_context); + add_buffer("p239", 2048, params_path, &mut function_context); + add_buffer("p240", 524288, params_path, &mut function_context); + add_buffer("p241", 2048, params_path, &mut function_context); + add_buffer("p242", 524288, params_path, &mut function_context); + add_buffer("p243", 2048, params_path, &mut function_context); + add_buffer("p244", 524288, params_path, &mut function_context); + add_buffer("p245", 2048, params_path, &mut function_context); + add_buffer("p246", 524288, params_path, &mut function_context); + add_buffer("p247", 2048, params_path, &mut function_context); + add_buffer("p248", 524288, params_path, &mut function_context); + add_buffer("p249", 2048, params_path, &mut function_context); + add_buffer("p250", 524288, params_path, &mut function_context); + add_buffer("p251", 2048, params_path, &mut function_context); + add_buffer("p252", 524288, params_path, &mut function_context); + add_buffer("p253", 2048, params_path, &mut function_context); + add_buffer("p254", 524288, params_path, &mut function_context); + add_buffer("p255", 2048, params_path, &mut function_context); + add_buffer("p256", 524288, params_path, &mut function_context); + add_buffer("p257", 2048, params_path, &mut function_context); + add_buffer("p258", 524288, params_path, &mut function_context); + add_buffer("p259", 2048, params_path, &mut function_context); + add_buffer("p260", 524288, params_path, &mut function_context); + add_buffer("p261", 2048, params_path, &mut function_context); + add_buffer("p262", 524288, params_path, &mut function_context); + add_buffer("p263", 2048, params_path, &mut function_context); + add_buffer("p264", 524288, params_path, &mut function_context); + add_buffer("p265", 2048, params_path, &mut function_context); + add_buffer("p266", 524288, params_path, &mut function_context); + add_buffer("p267", 2048, params_path, &mut function_context); + add_buffer("p268", 524288, params_path, &mut function_context); + add_buffer("p269", 2048, params_path, &mut function_context); + add_buffer("p270", 524288, params_path, &mut function_context); + add_buffer("p271", 2048, params_path, &mut function_context); + add_buffer("p272", 524288, params_path, &mut function_context); + add_buffer("p273", 2048, params_path, &mut function_context); + add_buffer("p274", 524288, params_path, &mut function_context); + add_buffer("p275", 2048, params_path, &mut function_context); + add_buffer("p276", 524288, params_path, &mut function_context); + add_buffer("p277", 2048, params_path, &mut function_context); + add_buffer("p278", 524288, params_path, &mut function_context); + add_buffer("p279", 2048, params_path, &mut function_context); + add_buffer("p280", 524288, params_path, &mut function_context); + add_buffer("p281", 2048, params_path, &mut function_context); + add_buffer("p282", 524288, params_path, &mut function_context); + add_buffer("p283", 2048, params_path, &mut function_context); + add_buffer("p284", 524288, params_path, &mut function_context); + add_buffer("p285", 2048, params_path, &mut function_context); + add_buffer("p286", 524288, params_path, &mut function_context); + add_buffer("p287", 2048, params_path, &mut function_context); + add_buffer("p288", 524288, params_path, &mut function_context); + add_buffer("p289", 2048, params_path, &mut function_context); + add_buffer("p290", 5120, params_path, &mut function_context); + add_buffer("p291", 40, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 512; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_test(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/test/"; + + add_buffer("input", 28, params_path, &mut function_context); + add_buffer("p0", 16, params_path, &mut function_context); + add_buffer("p1", 196, params_path, &mut function_context); + add_buffer("p2", 28, params_path, &mut function_context); + add_buffer("p3", 16, params_path, &mut function_context); + add_buffer("p4", 16, params_path, &mut function_context); + add_buffer("p5", 16, params_path, &mut function_context); + add_buffer("p6", 16, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 28; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_llm(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/llm/"; + + add_buffer("token_ids", 1024, params_path, &mut function_context); + add_buffer("p0", 16, params_path, &mut function_context); + add_buffer("p1", 8, params_path, &mut function_context); + add_buffer("p2", 16, params_path, &mut function_context); + add_buffer("p3", 8, params_path, &mut function_context); + add_buffer("p4", 16, params_path, &mut function_context); + add_buffer("p5", 8, params_path, &mut function_context); + add_buffer("p6", 16, params_path, &mut function_context); + add_buffer("p7", 8, params_path, &mut function_context); + add_buffer("p8", 16, params_path, &mut function_context); + add_buffer("p9", 8, params_path, &mut function_context); + add_buffer("p10", 16, params_path, &mut function_context); + add_buffer("p11", 8, params_path, &mut function_context); + add_buffer("p12", 16, params_path, &mut function_context); + add_buffer("p13", 8, params_path, &mut function_context); + add_buffer("p14", 16, params_path, &mut function_context); + add_buffer("p15", 8, params_path, &mut function_context); + add_buffer("p16", 16, params_path, &mut function_context); + add_buffer("p17", 8, params_path, &mut function_context); + add_buffer("p18", 16, params_path, &mut function_context); + add_buffer("p19", 8, params_path, &mut function_context); + add_buffer("p20", 16, params_path, &mut function_context); + add_buffer("p21", 8, params_path, &mut function_context); + add_buffer("p22", 16, params_path, &mut function_context); + add_buffer("p23", 8, params_path, &mut function_context); + add_buffer("p24", 16, params_path, &mut function_context); + add_buffer("p25", 8, params_path, &mut function_context); + add_buffer("p26", 16, params_path, &mut function_context); + add_buffer("p27", 8, params_path, &mut function_context); + add_buffer("p28", 16, params_path, &mut function_context); + add_buffer("p29", 8, params_path, &mut function_context); + add_buffer("p30", 16, params_path, &mut function_context); + add_buffer("p31", 8, params_path, &mut function_context); + add_buffer("p32", 16, params_path, &mut function_context); + add_buffer("p33", 8, params_path, &mut function_context); + add_buffer("p34", 16, params_path, &mut function_context); + add_buffer("p35", 8, params_path, &mut function_context); + add_buffer("p36", 16, params_path, &mut function_context); + add_buffer("p37", 8, params_path, &mut function_context); + add_buffer("p38", 16, params_path, &mut function_context); + add_buffer("p39", 8, params_path, &mut function_context); + add_buffer("p40", 16, params_path, &mut function_context); + add_buffer("p41", 8, params_path, &mut function_context); + add_buffer("p42", 16, params_path, &mut function_context); + add_buffer("p43", 8, params_path, &mut function_context); + add_buffer("p44", 16, params_path, &mut function_context); + add_buffer("p45", 8, params_path, &mut function_context); + add_buffer("p46", 16, params_path, &mut function_context); + add_buffer("p47", 8, params_path, &mut function_context); + add_buffer("p48", 16, params_path, &mut function_context); + add_buffer("p49", 8, params_path, &mut function_context); + add_buffer("p50", 16, params_path, &mut function_context); + add_buffer("p51", 8, params_path, &mut function_context); + add_buffer("p52", 16, params_path, &mut function_context); + add_buffer("p53", 8, params_path, &mut function_context); + add_buffer("p54", 16, params_path, &mut function_context); + add_buffer("p55", 8, params_path, &mut function_context); + add_buffer("p56", 16, params_path, &mut function_context); + add_buffer("p57", 8, params_path, &mut function_context); + add_buffer("p58", 16, params_path, &mut function_context); + add_buffer("p59", 8, params_path, &mut function_context); + add_buffer("p60", 16, params_path, &mut function_context); + add_buffer("p61", 8, params_path, &mut function_context); + add_buffer("p62", 16, params_path, &mut function_context); + add_buffer("p63", 8, params_path, &mut function_context); + add_buffer("p64", 16, params_path, &mut function_context); + add_buffer("p65", 8, params_path, &mut function_context); + add_buffer("p66", 16, params_path, &mut function_context); + add_buffer("p67", 8, params_path, &mut function_context); + add_buffer("p68", 16, params_path, &mut function_context); + add_buffer("p69", 8, params_path, &mut function_context); + add_buffer("p70", 16, params_path, &mut function_context); + add_buffer("p71", 8, params_path, &mut function_context); + add_buffer("p72", 16, params_path, &mut function_context); + add_buffer("p73", 8, params_path, &mut function_context); + add_buffer("p74", 16, params_path, &mut function_context); + add_buffer("p75", 8, params_path, &mut function_context); + add_buffer("p76", 16, params_path, &mut function_context); + add_buffer("p77", 8, params_path, &mut function_context); + add_buffer("p78", 16, params_path, &mut function_context); + add_buffer("p79", 8, params_path, &mut function_context); + add_buffer("p80", 16, params_path, &mut function_context); + add_buffer("p81", 8, params_path, &mut function_context); + add_buffer("p82", 16, params_path, &mut function_context); + add_buffer("p83", 8, params_path, &mut function_context); + add_buffer("p84", 16, params_path, &mut function_context); + add_buffer("p85", 8, params_path, &mut function_context); + add_buffer("p86", 16, params_path, &mut function_context); + add_buffer("p87", 8, params_path, &mut function_context); + add_buffer("p88", 16, params_path, &mut function_context); + add_buffer("p89", 8, params_path, &mut function_context); + add_buffer("p90", 16, params_path, &mut function_context); + add_buffer("p91", 8, params_path, &mut function_context); + add_buffer("p92", 16, params_path, &mut function_context); + add_buffer("p93", 8, params_path, &mut function_context); + add_buffer("p94", 16, params_path, &mut function_context); + add_buffer("p95", 8, params_path, &mut function_context); + add_buffer("p96", 16, params_path, &mut function_context); + add_buffer("p97", 8, params_path, &mut function_context); + add_buffer("p98", 16, params_path, &mut function_context); + add_buffer("p99", 8, params_path, &mut function_context); + add_buffer("p100", 16, params_path, &mut function_context); + add_buffer("p101", 8, params_path, &mut function_context); + add_buffer("p102", 16, params_path, &mut function_context); + add_buffer("p103", 8, params_path, &mut function_context); + add_buffer("p104", 16, params_path, &mut function_context); + add_buffer("p105", 8, params_path, &mut function_context); + add_buffer("p106", 16, params_path, &mut function_context); + add_buffer("p107", 8, params_path, &mut function_context); + add_buffer("p108", 16, params_path, &mut function_context); + add_buffer("p109", 8, params_path, &mut function_context); + add_buffer("p110", 16, params_path, &mut function_context); + add_buffer("p111", 8, params_path, &mut function_context); + add_buffer("p112", 16, params_path, &mut function_context); + add_buffer("p113", 8, params_path, &mut function_context); + add_buffer("p114", 16, params_path, &mut function_context); + add_buffer("p115", 8, params_path, &mut function_context); + add_buffer("p116", 16, params_path, &mut function_context); + add_buffer("p117", 8, params_path, &mut function_context); + add_buffer("p118", 16, params_path, &mut function_context); + add_buffer("p119", 8, params_path, &mut function_context); + add_buffer("p120", 16, params_path, &mut function_context); + add_buffer("p121", 8, params_path, &mut function_context); + add_buffer("p122", 16, params_path, &mut function_context); + add_buffer("p123", 8, params_path, &mut function_context); + add_buffer("p124", 16, params_path, &mut function_context); + add_buffer("p125", 8, params_path, &mut function_context); + add_buffer("p126", 16, params_path, &mut function_context); + add_buffer("p127", 8, params_path, &mut function_context); + add_buffer("p128", 16, params_path, &mut function_context); + add_buffer("p129", 8, params_path, &mut function_context); + add_buffer("p130", 16, params_path, &mut function_context); + add_buffer("p131", 8, params_path, &mut function_context); + add_buffer("p132", 16, params_path, &mut function_context); + add_buffer("p133", 8, params_path, &mut function_context); + add_buffer("p134", 16, params_path, &mut function_context); + add_buffer("p135", 8, params_path, &mut function_context); + add_buffer("p136", 16, params_path, &mut function_context); + add_buffer("p137", 8, params_path, &mut function_context); + add_buffer("p138", 16, params_path, &mut function_context); + add_buffer("p139", 8, params_path, &mut function_context); + add_buffer("p140", 16, params_path, &mut function_context); + add_buffer("p141", 8, params_path, &mut function_context); + add_buffer("p142", 16, params_path, &mut function_context); + add_buffer("p143", 8, params_path, &mut function_context); + add_buffer("p144", 16, params_path, &mut function_context); + add_buffer("p145", 8, params_path, &mut function_context); + add_buffer("p146", 16, params_path, &mut function_context); + add_buffer("p147", 8, params_path, &mut function_context); + add_buffer("p148", 16, params_path, &mut function_context); + add_buffer("p149", 8, params_path, &mut function_context); + add_buffer("p150", 16, params_path, &mut function_context); + add_buffer("p151", 8, params_path, &mut function_context); + add_buffer("p152", 16, params_path, &mut function_context); + add_buffer("p153", 8, params_path, &mut function_context); + add_buffer("p154", 16, params_path, &mut function_context); + add_buffer("p155", 8, params_path, &mut function_context); + add_buffer("p156", 16, params_path, &mut function_context); + add_buffer("p157", 8, params_path, &mut function_context); + add_buffer("p158", 16, params_path, &mut function_context); + add_buffer("p159", 8, params_path, &mut function_context); + add_buffer("p160", 16, params_path, &mut function_context); + add_buffer("p161", 8, params_path, &mut function_context); + add_buffer("p162", 16, params_path, &mut function_context); + add_buffer("p163", 8, params_path, &mut function_context); + add_buffer("p164", 16, params_path, &mut function_context); + add_buffer("p165", 8, params_path, &mut function_context); + add_buffer("p166", 16, params_path, &mut function_context); + add_buffer("p167", 8, params_path, &mut function_context); + add_buffer("p168", 16, params_path, &mut function_context); + add_buffer("p169", 8, params_path, &mut function_context); + add_buffer("p170", 16, params_path, &mut function_context); + add_buffer("p171", 8, params_path, &mut function_context); + add_buffer("p172", 16, params_path, &mut function_context); + add_buffer("p173", 8, params_path, &mut function_context); + add_buffer("p174", 16, params_path, &mut function_context); + add_buffer("p175", 8, params_path, &mut function_context); + add_buffer("p176", 16, params_path, &mut function_context); + add_buffer("p177", 8, params_path, &mut function_context); + add_buffer("p178", 16, params_path, &mut function_context); + add_buffer("p179", 8, params_path, &mut function_context); + add_buffer("p180", 16, params_path, &mut function_context); + add_buffer("p181", 8, params_path, &mut function_context); + add_buffer("p182", 16, params_path, &mut function_context); + add_buffer("p183", 8, params_path, &mut function_context); + add_buffer("p184", 16, params_path, &mut function_context); + add_buffer("p185", 8, params_path, &mut function_context); + add_buffer("p186", 16, params_path, &mut function_context); + add_buffer("p187", 8, params_path, &mut function_context); + add_buffer("p188", 16, params_path, &mut function_context); + add_buffer("p189", 8, params_path, &mut function_context); + add_buffer("p190", 16, params_path, &mut function_context); + add_buffer("p191", 8, params_path, &mut function_context); + add_buffer("p192", 16, params_path, &mut function_context); + add_buffer("p193", 8, params_path, &mut function_context); + add_buffer("p194", 16, params_path, &mut function_context); + add_buffer("p195", 8, params_path, &mut function_context); + add_buffer("p196", 16, params_path, &mut function_context); + add_buffer("p197", 8, params_path, &mut function_context); + add_buffer("p198", 16, params_path, &mut function_context); + add_buffer("p199", 8, params_path, &mut function_context); + add_buffer("p200", 16, params_path, &mut function_context); + add_buffer("p201", 8, params_path, &mut function_context); + add_buffer("p202", 16, params_path, &mut function_context); + add_buffer("p203", 8, params_path, &mut function_context); + add_buffer("p204", 16, params_path, &mut function_context); + add_buffer("p205", 8, params_path, &mut function_context); + add_buffer("p206", 16, params_path, &mut function_context); + add_buffer("p207", 8, params_path, &mut function_context); + add_buffer("p208", 16, params_path, &mut function_context); + add_buffer("p209", 8, params_path, &mut function_context); + add_buffer("p210", 16, params_path, &mut function_context); + add_buffer("p211", 8, params_path, &mut function_context); + add_buffer("p212", 16, params_path, &mut function_context); + add_buffer("p213", 8, params_path, &mut function_context); + add_buffer("p214", 16, params_path, &mut function_context); + add_buffer("p215", 8, params_path, &mut function_context); + add_buffer("p216", 16, params_path, &mut function_context); + add_buffer("p217", 8, params_path, &mut function_context); + add_buffer("p218", 16, params_path, &mut function_context); + add_buffer("p219", 8, params_path, &mut function_context); + add_buffer("p220", 16, params_path, &mut function_context); + add_buffer("p221", 8, params_path, &mut function_context); + add_buffer("p222", 16, params_path, &mut function_context); + add_buffer("p223", 8, params_path, &mut function_context); + add_buffer("p224", 16, params_path, &mut function_context); + add_buffer("p225", 8, params_path, &mut function_context); + add_buffer("p226", 16, params_path, &mut function_context); + add_buffer("p227", 8, params_path, &mut function_context); + add_buffer("p228", 16, params_path, &mut function_context); + add_buffer("p229", 8, params_path, &mut function_context); + add_buffer("p230", 16, params_path, &mut function_context); + add_buffer("p231", 8, params_path, &mut function_context); + add_buffer("p232", 16, params_path, &mut function_context); + add_buffer("p233", 8, params_path, &mut function_context); + add_buffer("p234", 16, params_path, &mut function_context); + add_buffer("p235", 8, params_path, &mut function_context); + add_buffer("p236", 16, params_path, &mut function_context); + add_buffer("p237", 8, params_path, &mut function_context); + add_buffer("p238", 16, params_path, &mut function_context); + add_buffer("p239", 8, params_path, &mut function_context); + add_buffer("p240", 16, params_path, &mut function_context); + add_buffer("p241", 8, params_path, &mut function_context); + add_buffer("p242", 16, params_path, &mut function_context); + add_buffer("p243", 8, params_path, &mut function_context); + add_buffer("p244", 16, params_path, &mut function_context); + add_buffer("p245", 8, params_path, &mut function_context); + add_buffer("p246", 16, params_path, &mut function_context); + add_buffer("p247", 8, params_path, &mut function_context); + add_buffer("p248", 16, params_path, &mut function_context); + add_buffer("p249", 8, params_path, &mut function_context); + add_buffer("p250", 16, params_path, &mut function_context); + add_buffer("p251", 8, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 1024; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_llama(mut function_context: Context) -> (usize, String, Vec, Context) { + add_number("times", 87, &mut function_context); + + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/llama/"; + + add_buffer("token_ids", 1024, params_path, &mut function_context); + add_buffer("p0", 1050673152, params_path, &mut function_context); + add_buffer("p1", 8192, params_path, &mut function_context); + add_buffer("p2", 16777216, params_path, &mut function_context); + add_buffer("p3", 32768, params_path, &mut function_context); + add_buffer("p4", 32768, params_path, &mut function_context); + add_buffer("p5", 4194304, params_path, &mut function_context); + add_buffer("p6", 32768, params_path, &mut function_context); + add_buffer("p7", 32768, params_path, &mut function_context); + add_buffer("p8", 16384, params_path, &mut function_context); + add_buffer("p9", 4194304, params_path, &mut function_context); + add_buffer("p10", 16777216, params_path, &mut function_context); + add_buffer("p11", 8192, params_path, &mut function_context); + add_buffer("p12", 67108864, params_path, &mut function_context); + add_buffer("p13", 67108864, params_path, &mut function_context); + add_buffer("p14", 67108864, params_path, &mut function_context); + add_buffer("p15", 8192, params_path, &mut function_context); + add_buffer("p16", 16777216, params_path, &mut function_context); + add_buffer("p17", 32768, params_path, &mut function_context); + add_buffer("p18", 32768, params_path, &mut function_context); + add_buffer("p19", 4194304, params_path, &mut function_context); + add_buffer("p20", 32768, params_path, &mut function_context); + add_buffer("p21", 32768, params_path, &mut function_context); + add_buffer("p22", 16384, params_path, &mut function_context); + add_buffer("p23", 4194304, params_path, &mut function_context); + add_buffer("p24", 16777216, params_path, &mut function_context); + add_buffer("p25", 8192, params_path, &mut function_context); + add_buffer("p26", 67108864, params_path, &mut function_context); + add_buffer("p27", 67108864, params_path, &mut function_context); + add_buffer("p28", 67108864, params_path, &mut function_context); + add_buffer("p29", 8192, params_path, &mut function_context); + add_buffer("p30", 16777216, params_path, &mut function_context); + add_buffer("p31", 32768, params_path, &mut function_context); + add_buffer("p32", 32768, params_path, &mut function_context); + add_buffer("p33", 4194304, params_path, &mut function_context); + add_buffer("p34", 32768, params_path, &mut function_context); + add_buffer("p35", 32768, params_path, &mut function_context); + add_buffer("p36", 16384, params_path, &mut function_context); + add_buffer("p37", 4194304, params_path, &mut function_context); + add_buffer("p38", 16777216, params_path, &mut function_context); + add_buffer("p39", 8192, params_path, &mut function_context); + add_buffer("p40", 67108864, params_path, &mut function_context); + add_buffer("p41", 67108864, params_path, &mut function_context); + add_buffer("p42", 67108864, params_path, &mut function_context); + add_buffer("p43", 8192, params_path, &mut function_context); + add_buffer("p44", 16777216, params_path, &mut function_context); + add_buffer("p45", 32768, params_path, &mut function_context); + add_buffer("p46", 32768, params_path, &mut function_context); + add_buffer("p47", 4194304, params_path, &mut function_context); + add_buffer("p48", 32768, params_path, &mut function_context); + add_buffer("p49", 32768, params_path, &mut function_context); + add_buffer("p50", 16384, params_path, &mut function_context); + add_buffer("p51", 4194304, params_path, &mut function_context); + add_buffer("p52", 16777216, params_path, &mut function_context); + add_buffer("p53", 8192, params_path, &mut function_context); + add_buffer("p54", 67108864, params_path, &mut function_context); + add_buffer("p55", 67108864, params_path, &mut function_context); + add_buffer("p56", 67108864, params_path, &mut function_context); + add_buffer("p57", 8192, params_path, &mut function_context); + add_buffer("p58", 16777216, params_path, &mut function_context); + add_buffer("p59", 32768, params_path, &mut function_context); + add_buffer("p60", 32768, params_path, &mut function_context); + add_buffer("p61", 4194304, params_path, &mut function_context); + add_buffer("p62", 32768, params_path, &mut function_context); + add_buffer("p63", 32768, params_path, &mut function_context); + add_buffer("p64", 16384, params_path, &mut function_context); + add_buffer("p65", 4194304, params_path, &mut function_context); + add_buffer("p66", 16777216, params_path, &mut function_context); + add_buffer("p67", 8192, params_path, &mut function_context); + add_buffer("p68", 67108864, params_path, &mut function_context); + add_buffer("p69", 67108864, params_path, &mut function_context); + add_buffer("p70", 67108864, params_path, &mut function_context); + add_buffer("p71", 8192, params_path, &mut function_context); + add_buffer("p72", 16777216, params_path, &mut function_context); + add_buffer("p73", 32768, params_path, &mut function_context); + add_buffer("p74", 32768, params_path, &mut function_context); + add_buffer("p75", 4194304, params_path, &mut function_context); + add_buffer("p76", 32768, params_path, &mut function_context); + add_buffer("p77", 32768, params_path, &mut function_context); + add_buffer("p78", 16384, params_path, &mut function_context); + add_buffer("p79", 4194304, params_path, &mut function_context); + add_buffer("p80", 16777216, params_path, &mut function_context); + add_buffer("p81", 8192, params_path, &mut function_context); + add_buffer("p82", 67108864, params_path, &mut function_context); + add_buffer("p83", 67108864, params_path, &mut function_context); + add_buffer("p84", 67108864, params_path, &mut function_context); + add_buffer("p85", 8192, params_path, &mut function_context); + add_buffer("p86", 16777216, params_path, &mut function_context); + add_buffer("p87", 32768, params_path, &mut function_context); + add_buffer("p88", 32768, params_path, &mut function_context); + add_buffer("p89", 4194304, params_path, &mut function_context); + add_buffer("p90", 32768, params_path, &mut function_context); + add_buffer("p91", 32768, params_path, &mut function_context); + add_buffer("p92", 16384, params_path, &mut function_context); + add_buffer("p93", 4194304, params_path, &mut function_context); + add_buffer("p94", 16777216, params_path, &mut function_context); + add_buffer("p95", 8192, params_path, &mut function_context); + add_buffer("p96", 67108864, params_path, &mut function_context); + add_buffer("p97", 67108864, params_path, &mut function_context); + add_buffer("p98", 67108864, params_path, &mut function_context); + add_buffer("p99", 8192, params_path, &mut function_context); + add_buffer("p100", 16777216, params_path, &mut function_context); + add_buffer("p101", 32768, params_path, &mut function_context); + add_buffer("p102", 32768, params_path, &mut function_context); + add_buffer("p103", 4194304, params_path, &mut function_context); + add_buffer("p104", 32768, params_path, &mut function_context); + add_buffer("p105", 32768, params_path, &mut function_context); + add_buffer("p106", 16384, params_path, &mut function_context); + add_buffer("p107", 4194304, params_path, &mut function_context); + add_buffer("p108", 16777216, params_path, &mut function_context); + add_buffer("p109", 8192, params_path, &mut function_context); + add_buffer("p110", 67108864, params_path, &mut function_context); + add_buffer("p111", 67108864, params_path, &mut function_context); + add_buffer("p112", 67108864, params_path, &mut function_context); + add_buffer("p113", 8192, params_path, &mut function_context); + add_buffer("p114", 16777216, params_path, &mut function_context); + add_buffer("p115", 32768, params_path, &mut function_context); + add_buffer("p116", 32768, params_path, &mut function_context); + add_buffer("p117", 4194304, params_path, &mut function_context); + add_buffer("p118", 32768, params_path, &mut function_context); + add_buffer("p119", 32768, params_path, &mut function_context); + add_buffer("p120", 16384, params_path, &mut function_context); + add_buffer("p121", 4194304, params_path, &mut function_context); + add_buffer("p122", 16777216, params_path, &mut function_context); + add_buffer("p123", 8192, params_path, &mut function_context); + add_buffer("p124", 67108864, params_path, &mut function_context); + add_buffer("p125", 67108864, params_path, &mut function_context); + add_buffer("p126", 67108864, params_path, &mut function_context); + add_buffer("p127", 8192, params_path, &mut function_context); + add_buffer("p128", 16777216, params_path, &mut function_context); + add_buffer("p129", 32768, params_path, &mut function_context); + add_buffer("p130", 32768, params_path, &mut function_context); + add_buffer("p131", 4194304, params_path, &mut function_context); + add_buffer("p132", 32768, params_path, &mut function_context); + add_buffer("p133", 32768, params_path, &mut function_context); + add_buffer("p134", 16384, params_path, &mut function_context); + add_buffer("p135", 4194304, params_path, &mut function_context); + add_buffer("p136", 16777216, params_path, &mut function_context); + add_buffer("p137", 8192, params_path, &mut function_context); + add_buffer("p138", 67108864, params_path, &mut function_context); + add_buffer("p139", 67108864, params_path, &mut function_context); + add_buffer("p140", 67108864, params_path, &mut function_context); + add_buffer("p141", 8192, params_path, &mut function_context); + add_buffer("p142", 16777216, params_path, &mut function_context); + add_buffer("p143", 32768, params_path, &mut function_context); + add_buffer("p144", 32768, params_path, &mut function_context); + add_buffer("p145", 4194304, params_path, &mut function_context); + add_buffer("p146", 32768, params_path, &mut function_context); + add_buffer("p147", 32768, params_path, &mut function_context); + add_buffer("p148", 16384, params_path, &mut function_context); + add_buffer("p149", 4194304, params_path, &mut function_context); + add_buffer("p150", 16777216, params_path, &mut function_context); + add_buffer("p151", 8192, params_path, &mut function_context); + add_buffer("p152", 67108864, params_path, &mut function_context); + add_buffer("p153", 67108864, params_path, &mut function_context); + add_buffer("p154", 67108864, params_path, &mut function_context); + add_buffer("p155", 8192, params_path, &mut function_context); + add_buffer("p156", 16777216, params_path, &mut function_context); + add_buffer("p157", 32768, params_path, &mut function_context); + add_buffer("p158", 32768, params_path, &mut function_context); + add_buffer("p159", 4194304, params_path, &mut function_context); + add_buffer("p160", 32768, params_path, &mut function_context); + add_buffer("p161", 32768, params_path, &mut function_context); + add_buffer("p162", 16384, params_path, &mut function_context); + add_buffer("p163", 4194304, params_path, &mut function_context); + add_buffer("p164", 16777216, params_path, &mut function_context); + add_buffer("p165", 8192, params_path, &mut function_context); + add_buffer("p166", 67108864, params_path, &mut function_context); + add_buffer("p167", 67108864, params_path, &mut function_context); + add_buffer("p168", 67108864, params_path, &mut function_context); + add_buffer("p169", 8192, params_path, &mut function_context); + add_buffer("p170", 16777216, params_path, &mut function_context); + add_buffer("p171", 32768, params_path, &mut function_context); + add_buffer("p172", 32768, params_path, &mut function_context); + add_buffer("p173", 4194304, params_path, &mut function_context); + add_buffer("p174", 32768, params_path, &mut function_context); + add_buffer("p175", 32768, params_path, &mut function_context); + add_buffer("p176", 16384, params_path, &mut function_context); + add_buffer("p177", 4194304, params_path, &mut function_context); + add_buffer("p178", 16777216, params_path, &mut function_context); + add_buffer("p179", 8192, params_path, &mut function_context); + add_buffer("p180", 67108864, params_path, &mut function_context); + add_buffer("p181", 67108864, params_path, &mut function_context); + add_buffer("p182", 67108864, params_path, &mut function_context); + add_buffer("p183", 8192, params_path, &mut function_context); + add_buffer("p184", 16777216, params_path, &mut function_context); + add_buffer("p185", 32768, params_path, &mut function_context); + add_buffer("p186", 32768, params_path, &mut function_context); + add_buffer("p187", 4194304, params_path, &mut function_context); + add_buffer("p188", 32768, params_path, &mut function_context); + add_buffer("p189", 32768, params_path, &mut function_context); + add_buffer("p190", 16384, params_path, &mut function_context); + add_buffer("p191", 4194304, params_path, &mut function_context); + add_buffer("p192", 16777216, params_path, &mut function_context); + add_buffer("p193", 8192, params_path, &mut function_context); + add_buffer("p194", 67108864, params_path, &mut function_context); + add_buffer("p195", 67108864, params_path, &mut function_context); + add_buffer("p196", 67108864, params_path, &mut function_context); + add_buffer("p197", 8192, params_path, &mut function_context); + add_buffer("p198", 16777216, params_path, &mut function_context); + add_buffer("p199", 32768, params_path, &mut function_context); + add_buffer("p200", 32768, params_path, &mut function_context); + add_buffer("p201", 4194304, params_path, &mut function_context); + add_buffer("p202", 32768, params_path, &mut function_context); + add_buffer("p203", 32768, params_path, &mut function_context); + add_buffer("p204", 16384, params_path, &mut function_context); + add_buffer("p205", 4194304, params_path, &mut function_context); + add_buffer("p206", 16777216, params_path, &mut function_context); + add_buffer("p207", 8192, params_path, &mut function_context); + add_buffer("p208", 67108864, params_path, &mut function_context); + add_buffer("p209", 67108864, params_path, &mut function_context); + add_buffer("p210", 67108864, params_path, &mut function_context); + add_buffer("p211", 8192, params_path, &mut function_context); + add_buffer("p212", 16777216, params_path, &mut function_context); + add_buffer("p213", 32768, params_path, &mut function_context); + add_buffer("p214", 32768, params_path, &mut function_context); + add_buffer("p215", 4194304, params_path, &mut function_context); + add_buffer("p216", 32768, params_path, &mut function_context); + add_buffer("p217", 32768, params_path, &mut function_context); + add_buffer("p218", 16384, params_path, &mut function_context); + add_buffer("p219", 4194304, params_path, &mut function_context); + add_buffer("p220", 16777216, params_path, &mut function_context); + add_buffer("p221", 8192, params_path, &mut function_context); + add_buffer("p222", 67108864, params_path, &mut function_context); + add_buffer("p223", 67108864, params_path, &mut function_context); + add_buffer("p224", 67108864, params_path, &mut function_context); + add_buffer("p225", 8192, params_path, &mut function_context); + add_buffer("p226", 1050673152, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 65667072; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn full_load_llama_kv(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/llama/"; + + add_buffer("token_ids", 1024, params_path, &mut function_context); + + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/llama_kv/"; + + add_buffer("keys", 16777216, params_path, &mut function_context); + add_buffer("values", 16777216, params_path, &mut function_context); + add_buffer("cos", 65536, params_path, &mut function_context); + add_buffer("sin", 65536, params_path, &mut function_context); + add_buffer("p0", 1024, params_path, &mut function_context); + add_buffer("p1", 1024, params_path, &mut function_context); + add_buffer("p2", 1024, params_path, &mut function_context); + add_buffer("p3", 1024, params_path, &mut function_context); + add_buffer("p4", 1024, params_path, &mut function_context); + add_buffer("p5", 1024, params_path, &mut function_context); + add_buffer("p6", 1024, params_path, &mut function_context); + add_buffer("p7", 1024, params_path, &mut function_context); + add_buffer("p8", 1024, params_path, &mut function_context); + add_buffer("p9", 1024, params_path, &mut function_context); + add_buffer("p10", 1024, params_path, &mut function_context); + add_buffer("p11", 1024, params_path, &mut function_context); + add_buffer("p12", 1024, params_path, &mut function_context); + add_buffer("p13", 1024, params_path, &mut function_context); + add_buffer("p14", 1024, params_path, &mut function_context); + add_buffer("p15", 1024, params_path, &mut function_context); + add_buffer("p16", 1050673152, params_path, &mut function_context); + add_buffer("p17", 8192, params_path, &mut function_context); + add_buffer("p18", 16777216, params_path, &mut function_context); + add_buffer("p19", 16384, params_path, &mut function_context); + add_buffer("p20", 16384, params_path, &mut function_context); + add_buffer("p21", 16384, params_path, &mut function_context); + add_buffer("p22", 16384, params_path, &mut function_context); + add_buffer("p23", 4194304, params_path, &mut function_context); + add_buffer("p24", 8, params_path, &mut function_context); + add_buffer("p25", 16384, params_path, &mut function_context); + add_buffer("p26", 16384, params_path, &mut function_context); + add_buffer("p27", 16384, params_path, &mut function_context); + add_buffer("p28", 16384, params_path, &mut function_context); + add_buffer("p29", 16384, params_path, &mut function_context); + add_buffer("p30", 4194304, params_path, &mut function_context); + add_buffer("p31", 16777216, params_path, &mut function_context); + add_buffer("p32", 8192, params_path, &mut function_context); + add_buffer("p33", 67108864, params_path, &mut function_context); + add_buffer("p34", 67108864, params_path, &mut function_context); + add_buffer("p35", 67108864, params_path, &mut function_context); + add_buffer("p36", 8192, params_path, &mut function_context); + add_buffer("p37", 16777216, params_path, &mut function_context); + add_buffer("p38", 16384, params_path, &mut function_context); + add_buffer("p39", 16384, params_path, &mut function_context); + add_buffer("p40", 16384, params_path, &mut function_context); + add_buffer("p41", 16384, params_path, &mut function_context); + add_buffer("p42", 4194304, params_path, &mut function_context); + add_buffer("p43", 8, params_path, &mut function_context); + add_buffer("p44", 16384, params_path, &mut function_context); + add_buffer("p45", 16384, params_path, &mut function_context); + add_buffer("p46", 16384, params_path, &mut function_context); + add_buffer("p47", 16384, params_path, &mut function_context); + add_buffer("p48", 16384, params_path, &mut function_context); + add_buffer("p49", 4194304, params_path, &mut function_context); + add_buffer("p50", 16777216, params_path, &mut function_context); + add_buffer("p51", 8192, params_path, &mut function_context); + add_buffer("p52", 67108864, params_path, &mut function_context); + add_buffer("p53", 67108864, params_path, &mut function_context); + add_buffer("p54", 67108864, params_path, &mut function_context); + add_buffer("p55", 8192, params_path, &mut function_context); + add_buffer("p56", 16777216, params_path, &mut function_context); + add_buffer("p57", 16384, params_path, &mut function_context); + add_buffer("p58", 16384, params_path, &mut function_context); + add_buffer("p59", 16384, params_path, &mut function_context); + add_buffer("p60", 16384, params_path, &mut function_context); + add_buffer("p61", 4194304, params_path, &mut function_context); + add_buffer("p62", 8, params_path, &mut function_context); + add_buffer("p63", 16384, params_path, &mut function_context); + add_buffer("p64", 16384, params_path, &mut function_context); + add_buffer("p65", 16384, params_path, &mut function_context); + add_buffer("p66", 16384, params_path, &mut function_context); + add_buffer("p67", 16384, params_path, &mut function_context); + add_buffer("p68", 4194304, params_path, &mut function_context); + add_buffer("p69", 16777216, params_path, &mut function_context); + add_buffer("p70", 8192, params_path, &mut function_context); + add_buffer("p71", 67108864, params_path, &mut function_context); + add_buffer("p72", 67108864, params_path, &mut function_context); + add_buffer("p73", 67108864, params_path, &mut function_context); + add_buffer("p74", 8192, params_path, &mut function_context); + add_buffer("p75", 16777216, params_path, &mut function_context); + add_buffer("p76", 16384, params_path, &mut function_context); + add_buffer("p77", 16384, params_path, &mut function_context); + add_buffer("p78", 16384, params_path, &mut function_context); + add_buffer("p79", 16384, params_path, &mut function_context); + add_buffer("p80", 4194304, params_path, &mut function_context); + add_buffer("p81", 8, params_path, &mut function_context); + add_buffer("p82", 16384, params_path, &mut function_context); + add_buffer("p83", 16384, params_path, &mut function_context); + add_buffer("p84", 16384, params_path, &mut function_context); + add_buffer("p85", 16384, params_path, &mut function_context); + add_buffer("p86", 16384, params_path, &mut function_context); + add_buffer("p87", 4194304, params_path, &mut function_context); + add_buffer("p88", 16777216, params_path, &mut function_context); + add_buffer("p89", 8192, params_path, &mut function_context); + add_buffer("p90", 67108864, params_path, &mut function_context); + add_buffer("p91", 67108864, params_path, &mut function_context); + add_buffer("p92", 67108864, params_path, &mut function_context); + add_buffer("p93", 8192, params_path, &mut function_context); + add_buffer("p94", 16777216, params_path, &mut function_context); + add_buffer("p95", 16384, params_path, &mut function_context); + add_buffer("p96", 16384, params_path, &mut function_context); + add_buffer("p97", 16384, params_path, &mut function_context); + add_buffer("p98", 16384, params_path, &mut function_context); + add_buffer("p99", 4194304, params_path, &mut function_context); + add_buffer("p100", 8, params_path, &mut function_context); + add_buffer("p101", 16384, params_path, &mut function_context); + add_buffer("p102", 16384, params_path, &mut function_context); + add_buffer("p103", 16384, params_path, &mut function_context); + add_buffer("p104", 16384, params_path, &mut function_context); + add_buffer("p105", 16384, params_path, &mut function_context); + add_buffer("p106", 4194304, params_path, &mut function_context); + add_buffer("p107", 16777216, params_path, &mut function_context); + add_buffer("p108", 8192, params_path, &mut function_context); + add_buffer("p109", 67108864, params_path, &mut function_context); + add_buffer("p110", 67108864, params_path, &mut function_context); + add_buffer("p111", 67108864, params_path, &mut function_context); + add_buffer("p112", 8192, params_path, &mut function_context); + add_buffer("p113", 16777216, params_path, &mut function_context); + add_buffer("p114", 16384, params_path, &mut function_context); + add_buffer("p115", 16384, params_path, &mut function_context); + add_buffer("p116", 16384, params_path, &mut function_context); + add_buffer("p117", 16384, params_path, &mut function_context); + add_buffer("p118", 4194304, params_path, &mut function_context); + add_buffer("p119", 8, params_path, &mut function_context); + add_buffer("p120", 16384, params_path, &mut function_context); + add_buffer("p121", 16384, params_path, &mut function_context); + add_buffer("p122", 16384, params_path, &mut function_context); + add_buffer("p123", 16384, params_path, &mut function_context); + add_buffer("p124", 16384, params_path, &mut function_context); + add_buffer("p125", 4194304, params_path, &mut function_context); + add_buffer("p126", 16777216, params_path, &mut function_context); + add_buffer("p127", 8192, params_path, &mut function_context); + add_buffer("p128", 67108864, params_path, &mut function_context); + add_buffer("p129", 67108864, params_path, &mut function_context); + add_buffer("p130", 67108864, params_path, &mut function_context); + add_buffer("p131", 8192, params_path, &mut function_context); + add_buffer("p132", 16777216, params_path, &mut function_context); + add_buffer("p133", 16384, params_path, &mut function_context); + add_buffer("p134", 16384, params_path, &mut function_context); + add_buffer("p135", 16384, params_path, &mut function_context); + add_buffer("p136", 16384, params_path, &mut function_context); + add_buffer("p137", 4194304, params_path, &mut function_context); + add_buffer("p138", 8, params_path, &mut function_context); + add_buffer("p139", 16384, params_path, &mut function_context); + add_buffer("p140", 16384, params_path, &mut function_context); + add_buffer("p141", 16384, params_path, &mut function_context); + add_buffer("p142", 16384, params_path, &mut function_context); + add_buffer("p143", 16384, params_path, &mut function_context); + add_buffer("p144", 4194304, params_path, &mut function_context); + add_buffer("p145", 16777216, params_path, &mut function_context); + add_buffer("p146", 8192, params_path, &mut function_context); + add_buffer("p147", 67108864, params_path, &mut function_context); + add_buffer("p148", 67108864, params_path, &mut function_context); + add_buffer("p149", 67108864, params_path, &mut function_context); + add_buffer("p150", 8192, params_path, &mut function_context); + add_buffer("p151", 16777216, params_path, &mut function_context); + add_buffer("p152", 16384, params_path, &mut function_context); + add_buffer("p153", 16384, params_path, &mut function_context); + add_buffer("p154", 16384, params_path, &mut function_context); + add_buffer("p155", 16384, params_path, &mut function_context); + add_buffer("p156", 4194304, params_path, &mut function_context); + add_buffer("p157", 8, params_path, &mut function_context); + add_buffer("p158", 16384, params_path, &mut function_context); + add_buffer("p159", 16384, params_path, &mut function_context); + add_buffer("p160", 16384, params_path, &mut function_context); + add_buffer("p161", 16384, params_path, &mut function_context); + add_buffer("p162", 16384, params_path, &mut function_context); + add_buffer("p163", 4194304, params_path, &mut function_context); + add_buffer("p164", 16777216, params_path, &mut function_context); + add_buffer("p165", 8192, params_path, &mut function_context); + add_buffer("p166", 67108864, params_path, &mut function_context); + add_buffer("p167", 67108864, params_path, &mut function_context); + add_buffer("p168", 67108864, params_path, &mut function_context); + add_buffer("p169", 8192, params_path, &mut function_context); + add_buffer("p170", 16777216, params_path, &mut function_context); + add_buffer("p171", 16384, params_path, &mut function_context); + add_buffer("p172", 16384, params_path, &mut function_context); + add_buffer("p173", 16384, params_path, &mut function_context); + add_buffer("p174", 16384, params_path, &mut function_context); + add_buffer("p175", 4194304, params_path, &mut function_context); + add_buffer("p176", 8, params_path, &mut function_context); + add_buffer("p177", 16384, params_path, &mut function_context); + add_buffer("p178", 16384, params_path, &mut function_context); + add_buffer("p179", 16384, params_path, &mut function_context); + add_buffer("p180", 16384, params_path, &mut function_context); + add_buffer("p181", 16384, params_path, &mut function_context); + add_buffer("p182", 4194304, params_path, &mut function_context); + add_buffer("p183", 16777216, params_path, &mut function_context); + add_buffer("p184", 8192, params_path, &mut function_context); + add_buffer("p185", 67108864, params_path, &mut function_context); + add_buffer("p186", 67108864, params_path, &mut function_context); + add_buffer("p187", 67108864, params_path, &mut function_context); + add_buffer("p188", 8192, params_path, &mut function_context); + add_buffer("p189", 16777216, params_path, &mut function_context); + add_buffer("p190", 16384, params_path, &mut function_context); + add_buffer("p191", 16384, params_path, &mut function_context); + add_buffer("p192", 16384, params_path, &mut function_context); + add_buffer("p193", 16384, params_path, &mut function_context); + add_buffer("p194", 4194304, params_path, &mut function_context); + add_buffer("p195", 8, params_path, &mut function_context); + add_buffer("p196", 16384, params_path, &mut function_context); + add_buffer("p197", 16384, params_path, &mut function_context); + add_buffer("p198", 16384, params_path, &mut function_context); + add_buffer("p199", 16384, params_path, &mut function_context); + add_buffer("p200", 16384, params_path, &mut function_context); + add_buffer("p201", 4194304, params_path, &mut function_context); + add_buffer("p202", 16777216, params_path, &mut function_context); + add_buffer("p203", 8192, params_path, &mut function_context); + add_buffer("p204", 67108864, params_path, &mut function_context); + add_buffer("p205", 67108864, params_path, &mut function_context); + add_buffer("p206", 67108864, params_path, &mut function_context); + add_buffer("p207", 8192, params_path, &mut function_context); + add_buffer("p208", 16777216, params_path, &mut function_context); + add_buffer("p209", 16384, params_path, &mut function_context); + add_buffer("p210", 16384, params_path, &mut function_context); + add_buffer("p211", 16384, params_path, &mut function_context); + add_buffer("p212", 16384, params_path, &mut function_context); + add_buffer("p213", 4194304, params_path, &mut function_context); + add_buffer("p214", 8, params_path, &mut function_context); + add_buffer("p215", 16384, params_path, &mut function_context); + add_buffer("p216", 16384, params_path, &mut function_context); + add_buffer("p217", 16384, params_path, &mut function_context); + add_buffer("p218", 16384, params_path, &mut function_context); + add_buffer("p219", 16384, params_path, &mut function_context); + add_buffer("p220", 4194304, params_path, &mut function_context); + add_buffer("p221", 16777216, params_path, &mut function_context); + add_buffer("p222", 8192, params_path, &mut function_context); + add_buffer("p223", 67108864, params_path, &mut function_context); + add_buffer("p224", 67108864, params_path, &mut function_context); + add_buffer("p225", 67108864, params_path, &mut function_context); + add_buffer("p226", 8192, params_path, &mut function_context); + add_buffer("p227", 16777216, params_path, &mut function_context); + add_buffer("p228", 16384, params_path, &mut function_context); + add_buffer("p229", 16384, params_path, &mut function_context); + add_buffer("p230", 16384, params_path, &mut function_context); + add_buffer("p231", 16384, params_path, &mut function_context); + add_buffer("p232", 4194304, params_path, &mut function_context); + add_buffer("p233", 8, params_path, &mut function_context); + add_buffer("p234", 16384, params_path, &mut function_context); + add_buffer("p235", 16384, params_path, &mut function_context); + add_buffer("p236", 16384, params_path, &mut function_context); + add_buffer("p237", 16384, params_path, &mut function_context); + add_buffer("p238", 16384, params_path, &mut function_context); + add_buffer("p239", 4194304, params_path, &mut function_context); + add_buffer("p240", 16777216, params_path, &mut function_context); + add_buffer("p241", 8192, params_path, &mut function_context); + add_buffer("p242", 67108864, params_path, &mut function_context); + add_buffer("p243", 67108864, params_path, &mut function_context); + add_buffer("p244", 67108864, params_path, &mut function_context); + add_buffer("p245", 8192, params_path, &mut function_context); + add_buffer("p246", 16777216, params_path, &mut function_context); + add_buffer("p247", 16384, params_path, &mut function_context); + add_buffer("p248", 16384, params_path, &mut function_context); + add_buffer("p249", 16384, params_path, &mut function_context); + add_buffer("p250", 16384, params_path, &mut function_context); + add_buffer("p251", 4194304, params_path, &mut function_context); + add_buffer("p252", 8, params_path, &mut function_context); + add_buffer("p253", 16384, params_path, &mut function_context); + add_buffer("p254", 16384, params_path, &mut function_context); + add_buffer("p255", 16384, params_path, &mut function_context); + add_buffer("p256", 16384, params_path, &mut function_context); + add_buffer("p257", 16384, params_path, &mut function_context); + add_buffer("p258", 4194304, params_path, &mut function_context); + add_buffer("p259", 16777216, params_path, &mut function_context); + add_buffer("p260", 8192, params_path, &mut function_context); + add_buffer("p261", 67108864, params_path, &mut function_context); + add_buffer("p262", 67108864, params_path, &mut function_context); + add_buffer("p263", 67108864, params_path, &mut function_context); + add_buffer("p264", 8192, params_path, &mut function_context); + add_buffer("p265", 16777216, params_path, &mut function_context); + add_buffer("p266", 16384, params_path, &mut function_context); + add_buffer("p267", 16384, params_path, &mut function_context); + add_buffer("p268", 16384, params_path, &mut function_context); + add_buffer("p269", 16384, params_path, &mut function_context); + add_buffer("p270", 4194304, params_path, &mut function_context); + add_buffer("p271", 8, params_path, &mut function_context); + add_buffer("p272", 16384, params_path, &mut function_context); + add_buffer("p273", 16384, params_path, &mut function_context); + add_buffer("p274", 16384, params_path, &mut function_context); + add_buffer("p275", 16384, params_path, &mut function_context); + add_buffer("p276", 16384, params_path, &mut function_context); + add_buffer("p277", 4194304, params_path, &mut function_context); + add_buffer("p278", 16777216, params_path, &mut function_context); + add_buffer("p279", 8192, params_path, &mut function_context); + add_buffer("p280", 67108864, params_path, &mut function_context); + add_buffer("p281", 67108864, params_path, &mut function_context); + add_buffer("p282", 67108864, params_path, &mut function_context); + add_buffer("p283", 8192, params_path, &mut function_context); + add_buffer("p284", 16777216, params_path, &mut function_context); + add_buffer("p285", 16384, params_path, &mut function_context); + add_buffer("p286", 16384, params_path, &mut function_context); + add_buffer("p287", 16384, params_path, &mut function_context); + add_buffer("p288", 16384, params_path, &mut function_context); + add_buffer("p289", 4194304, params_path, &mut function_context); + add_buffer("p290", 8, params_path, &mut function_context); + add_buffer("p291", 16384, params_path, &mut function_context); + add_buffer("p292", 16384, params_path, &mut function_context); + add_buffer("p293", 16384, params_path, &mut function_context); + add_buffer("p294", 16384, params_path, &mut function_context); + add_buffer("p295", 16384, params_path, &mut function_context); + add_buffer("p296", 4194304, params_path, &mut function_context); + add_buffer("p297", 16777216, params_path, &mut function_context); + add_buffer("p298", 8192, params_path, &mut function_context); + add_buffer("p299", 67108864, params_path, &mut function_context); + add_buffer("p300", 67108864, params_path, &mut function_context); + add_buffer("p301", 67108864, params_path, &mut function_context); + add_buffer("p302", 8192, params_path, &mut function_context); + add_buffer("p303", 16777216, params_path, &mut function_context); + add_buffer("p304", 16384, params_path, &mut function_context); + add_buffer("p305", 16384, params_path, &mut function_context); + add_buffer("p306", 16384, params_path, &mut function_context); + add_buffer("p307", 16384, params_path, &mut function_context); + add_buffer("p308", 4194304, params_path, &mut function_context); + add_buffer("p309", 8, params_path, &mut function_context); + add_buffer("p310", 16384, params_path, &mut function_context); + add_buffer("p311", 16384, params_path, &mut function_context); + add_buffer("p312", 16384, params_path, &mut function_context); + add_buffer("p313", 16384, params_path, &mut function_context); + add_buffer("p314", 16384, params_path, &mut function_context); + add_buffer("p315", 4194304, params_path, &mut function_context); + add_buffer("p316", 16777216, params_path, &mut function_context); + add_buffer("p317", 8192, params_path, &mut function_context); + add_buffer("p318", 67108864, params_path, &mut function_context); + add_buffer("p319", 67108864, params_path, &mut function_context); + add_buffer("p320", 67108864, params_path, &mut function_context); + add_buffer("p321", 8192, params_path, &mut function_context); + add_buffer("p322", 1050673152, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 1048576; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_llama_kv(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/llama_kv/"; + + add_buffer("token_id", 8, params_path, &mut function_context); + add_buffer("index", 8, params_path, &mut function_context); + add_buffer("keys", 16777216, params_path, &mut function_context); + add_buffer("values", 16777216, params_path, &mut function_context); + add_buffer("cos", 65536, params_path, &mut function_context); + add_buffer("sin", 65536, params_path, &mut function_context); + add_buffer("p0", 1024, params_path, &mut function_context); + add_buffer("p1", 1024, params_path, &mut function_context); + add_buffer("p2", 1024, params_path, &mut function_context); + add_buffer("p3", 1024, params_path, &mut function_context); + add_buffer("p4", 1024, params_path, &mut function_context); + add_buffer("p5", 1024, params_path, &mut function_context); + add_buffer("p6", 1024, params_path, &mut function_context); + add_buffer("p7", 1024, params_path, &mut function_context); + add_buffer("p8", 1024, params_path, &mut function_context); + add_buffer("p9", 1024, params_path, &mut function_context); + add_buffer("p10", 1024, params_path, &mut function_context); + add_buffer("p11", 1024, params_path, &mut function_context); + add_buffer("p12", 1024, params_path, &mut function_context); + add_buffer("p13", 1024, params_path, &mut function_context); + add_buffer("p14", 1024, params_path, &mut function_context); + add_buffer("p15", 1024, params_path, &mut function_context); + add_buffer("p16", 1050673152, params_path, &mut function_context); + add_buffer("p17", 8192, params_path, &mut function_context); + add_buffer("p18", 16777216, params_path, &mut function_context); + add_buffer("p19", 16384, params_path, &mut function_context); + add_buffer("p20", 16384, params_path, &mut function_context); + add_buffer("p21", 16384, params_path, &mut function_context); + add_buffer("p22", 16384, params_path, &mut function_context); + add_buffer("p23", 4194304, params_path, &mut function_context); + add_buffer("p24", 8, params_path, &mut function_context); + add_buffer("p25", 16384, params_path, &mut function_context); + add_buffer("p26", 16384, params_path, &mut function_context); + add_buffer("p27", 16384, params_path, &mut function_context); + add_buffer("p28", 16384, params_path, &mut function_context); + add_buffer("p29", 16384, params_path, &mut function_context); + add_buffer("p30", 4194304, params_path, &mut function_context); + add_buffer("p31", 16777216, params_path, &mut function_context); + add_buffer("p32", 8192, params_path, &mut function_context); + add_buffer("p33", 67108864, params_path, &mut function_context); + add_buffer("p34", 67108864, params_path, &mut function_context); + add_buffer("p35", 67108864, params_path, &mut function_context); + add_buffer("p36", 8192, params_path, &mut function_context); + add_buffer("p37", 16777216, params_path, &mut function_context); + add_buffer("p38", 16384, params_path, &mut function_context); + add_buffer("p39", 16384, params_path, &mut function_context); + add_buffer("p40", 16384, params_path, &mut function_context); + add_buffer("p41", 16384, params_path, &mut function_context); + add_buffer("p42", 4194304, params_path, &mut function_context); + add_buffer("p43", 8, params_path, &mut function_context); + add_buffer("p44", 16384, params_path, &mut function_context); + add_buffer("p45", 16384, params_path, &mut function_context); + add_buffer("p46", 16384, params_path, &mut function_context); + add_buffer("p47", 16384, params_path, &mut function_context); + add_buffer("p48", 16384, params_path, &mut function_context); + add_buffer("p49", 4194304, params_path, &mut function_context); + add_buffer("p50", 16777216, params_path, &mut function_context); + add_buffer("p51", 8192, params_path, &mut function_context); + add_buffer("p52", 67108864, params_path, &mut function_context); + add_buffer("p53", 67108864, params_path, &mut function_context); + add_buffer("p54", 67108864, params_path, &mut function_context); + add_buffer("p55", 8192, params_path, &mut function_context); + add_buffer("p56", 16777216, params_path, &mut function_context); + add_buffer("p57", 16384, params_path, &mut function_context); + add_buffer("p58", 16384, params_path, &mut function_context); + add_buffer("p59", 16384, params_path, &mut function_context); + add_buffer("p60", 16384, params_path, &mut function_context); + add_buffer("p61", 4194304, params_path, &mut function_context); + add_buffer("p62", 8, params_path, &mut function_context); + add_buffer("p63", 16384, params_path, &mut function_context); + add_buffer("p64", 16384, params_path, &mut function_context); + add_buffer("p65", 16384, params_path, &mut function_context); + add_buffer("p66", 16384, params_path, &mut function_context); + add_buffer("p67", 16384, params_path, &mut function_context); + add_buffer("p68", 4194304, params_path, &mut function_context); + add_buffer("p69", 16777216, params_path, &mut function_context); + add_buffer("p70", 8192, params_path, &mut function_context); + add_buffer("p71", 67108864, params_path, &mut function_context); + add_buffer("p72", 67108864, params_path, &mut function_context); + add_buffer("p73", 67108864, params_path, &mut function_context); + add_buffer("p74", 8192, params_path, &mut function_context); + add_buffer("p75", 16777216, params_path, &mut function_context); + add_buffer("p76", 16384, params_path, &mut function_context); + add_buffer("p77", 16384, params_path, &mut function_context); + add_buffer("p78", 16384, params_path, &mut function_context); + add_buffer("p79", 16384, params_path, &mut function_context); + add_buffer("p80", 4194304, params_path, &mut function_context); + add_buffer("p81", 8, params_path, &mut function_context); + add_buffer("p82", 16384, params_path, &mut function_context); + add_buffer("p83", 16384, params_path, &mut function_context); + add_buffer("p84", 16384, params_path, &mut function_context); + add_buffer("p85", 16384, params_path, &mut function_context); + add_buffer("p86", 16384, params_path, &mut function_context); + add_buffer("p87", 4194304, params_path, &mut function_context); + add_buffer("p88", 16777216, params_path, &mut function_context); + add_buffer("p89", 8192, params_path, &mut function_context); + add_buffer("p90", 67108864, params_path, &mut function_context); + add_buffer("p91", 67108864, params_path, &mut function_context); + add_buffer("p92", 67108864, params_path, &mut function_context); + add_buffer("p93", 8192, params_path, &mut function_context); + add_buffer("p94", 16777216, params_path, &mut function_context); + add_buffer("p95", 16384, params_path, &mut function_context); + add_buffer("p96", 16384, params_path, &mut function_context); + add_buffer("p97", 16384, params_path, &mut function_context); + add_buffer("p98", 16384, params_path, &mut function_context); + add_buffer("p99", 4194304, params_path, &mut function_context); + add_buffer("p100", 8, params_path, &mut function_context); + add_buffer("p101", 16384, params_path, &mut function_context); + add_buffer("p102", 16384, params_path, &mut function_context); + add_buffer("p103", 16384, params_path, &mut function_context); + add_buffer("p104", 16384, params_path, &mut function_context); + add_buffer("p105", 16384, params_path, &mut function_context); + add_buffer("p106", 4194304, params_path, &mut function_context); + add_buffer("p107", 16777216, params_path, &mut function_context); + add_buffer("p108", 8192, params_path, &mut function_context); + add_buffer("p109", 67108864, params_path, &mut function_context); + add_buffer("p110", 67108864, params_path, &mut function_context); + add_buffer("p111", 67108864, params_path, &mut function_context); + add_buffer("p112", 8192, params_path, &mut function_context); + add_buffer("p113", 16777216, params_path, &mut function_context); + add_buffer("p114", 16384, params_path, &mut function_context); + add_buffer("p115", 16384, params_path, &mut function_context); + add_buffer("p116", 16384, params_path, &mut function_context); + add_buffer("p117", 16384, params_path, &mut function_context); + add_buffer("p118", 4194304, params_path, &mut function_context); + add_buffer("p119", 8, params_path, &mut function_context); + add_buffer("p120", 16384, params_path, &mut function_context); + add_buffer("p121", 16384, params_path, &mut function_context); + add_buffer("p122", 16384, params_path, &mut function_context); + add_buffer("p123", 16384, params_path, &mut function_context); + add_buffer("p124", 16384, params_path, &mut function_context); + add_buffer("p125", 4194304, params_path, &mut function_context); + add_buffer("p126", 16777216, params_path, &mut function_context); + add_buffer("p127", 8192, params_path, &mut function_context); + add_buffer("p128", 67108864, params_path, &mut function_context); + add_buffer("p129", 67108864, params_path, &mut function_context); + add_buffer("p130", 67108864, params_path, &mut function_context); + add_buffer("p131", 8192, params_path, &mut function_context); + add_buffer("p132", 16777216, params_path, &mut function_context); + add_buffer("p133", 16384, params_path, &mut function_context); + add_buffer("p134", 16384, params_path, &mut function_context); + add_buffer("p135", 16384, params_path, &mut function_context); + add_buffer("p136", 16384, params_path, &mut function_context); + add_buffer("p137", 4194304, params_path, &mut function_context); + add_buffer("p138", 8, params_path, &mut function_context); + add_buffer("p139", 16384, params_path, &mut function_context); + add_buffer("p140", 16384, params_path, &mut function_context); + add_buffer("p141", 16384, params_path, &mut function_context); + add_buffer("p142", 16384, params_path, &mut function_context); + add_buffer("p143", 16384, params_path, &mut function_context); + add_buffer("p144", 4194304, params_path, &mut function_context); + add_buffer("p145", 16777216, params_path, &mut function_context); + add_buffer("p146", 8192, params_path, &mut function_context); + add_buffer("p147", 67108864, params_path, &mut function_context); + add_buffer("p148", 67108864, params_path, &mut function_context); + add_buffer("p149", 67108864, params_path, &mut function_context); + add_buffer("p150", 8192, params_path, &mut function_context); + add_buffer("p151", 16777216, params_path, &mut function_context); + add_buffer("p152", 16384, params_path, &mut function_context); + add_buffer("p153", 16384, params_path, &mut function_context); + add_buffer("p154", 16384, params_path, &mut function_context); + add_buffer("p155", 16384, params_path, &mut function_context); + add_buffer("p156", 4194304, params_path, &mut function_context); + add_buffer("p157", 8, params_path, &mut function_context); + add_buffer("p158", 16384, params_path, &mut function_context); + add_buffer("p159", 16384, params_path, &mut function_context); + add_buffer("p160", 16384, params_path, &mut function_context); + add_buffer("p161", 16384, params_path, &mut function_context); + add_buffer("p162", 16384, params_path, &mut function_context); + add_buffer("p163", 4194304, params_path, &mut function_context); + add_buffer("p164", 16777216, params_path, &mut function_context); + add_buffer("p165", 8192, params_path, &mut function_context); + add_buffer("p166", 67108864, params_path, &mut function_context); + add_buffer("p167", 67108864, params_path, &mut function_context); + add_buffer("p168", 67108864, params_path, &mut function_context); + add_buffer("p169", 8192, params_path, &mut function_context); + add_buffer("p170", 16777216, params_path, &mut function_context); + add_buffer("p171", 16384, params_path, &mut function_context); + add_buffer("p172", 16384, params_path, &mut function_context); + add_buffer("p173", 16384, params_path, &mut function_context); + add_buffer("p174", 16384, params_path, &mut function_context); + add_buffer("p175", 4194304, params_path, &mut function_context); + add_buffer("p176", 8, params_path, &mut function_context); + add_buffer("p177", 16384, params_path, &mut function_context); + add_buffer("p178", 16384, params_path, &mut function_context); + add_buffer("p179", 16384, params_path, &mut function_context); + add_buffer("p180", 16384, params_path, &mut function_context); + add_buffer("p181", 16384, params_path, &mut function_context); + add_buffer("p182", 4194304, params_path, &mut function_context); + add_buffer("p183", 16777216, params_path, &mut function_context); + add_buffer("p184", 8192, params_path, &mut function_context); + add_buffer("p185", 67108864, params_path, &mut function_context); + add_buffer("p186", 67108864, params_path, &mut function_context); + add_buffer("p187", 67108864, params_path, &mut function_context); + add_buffer("p188", 8192, params_path, &mut function_context); + add_buffer("p189", 16777216, params_path, &mut function_context); + add_buffer("p190", 16384, params_path, &mut function_context); + add_buffer("p191", 16384, params_path, &mut function_context); + add_buffer("p192", 16384, params_path, &mut function_context); + add_buffer("p193", 16384, params_path, &mut function_context); + add_buffer("p194", 4194304, params_path, &mut function_context); + add_buffer("p195", 8, params_path, &mut function_context); + add_buffer("p196", 16384, params_path, &mut function_context); + add_buffer("p197", 16384, params_path, &mut function_context); + add_buffer("p198", 16384, params_path, &mut function_context); + add_buffer("p199", 16384, params_path, &mut function_context); + add_buffer("p200", 16384, params_path, &mut function_context); + add_buffer("p201", 4194304, params_path, &mut function_context); + add_buffer("p202", 16777216, params_path, &mut function_context); + add_buffer("p203", 8192, params_path, &mut function_context); + add_buffer("p204", 67108864, params_path, &mut function_context); + add_buffer("p205", 67108864, params_path, &mut function_context); + add_buffer("p206", 67108864, params_path, &mut function_context); + add_buffer("p207", 8192, params_path, &mut function_context); + add_buffer("p208", 16777216, params_path, &mut function_context); + add_buffer("p209", 16384, params_path, &mut function_context); + add_buffer("p210", 16384, params_path, &mut function_context); + add_buffer("p211", 16384, params_path, &mut function_context); + add_buffer("p212", 16384, params_path, &mut function_context); + add_buffer("p213", 4194304, params_path, &mut function_context); + add_buffer("p214", 8, params_path, &mut function_context); + add_buffer("p215", 16384, params_path, &mut function_context); + add_buffer("p216", 16384, params_path, &mut function_context); + add_buffer("p217", 16384, params_path, &mut function_context); + add_buffer("p218", 16384, params_path, &mut function_context); + add_buffer("p219", 16384, params_path, &mut function_context); + add_buffer("p220", 4194304, params_path, &mut function_context); + add_buffer("p221", 16777216, params_path, &mut function_context); + add_buffer("p222", 8192, params_path, &mut function_context); + add_buffer("p223", 67108864, params_path, &mut function_context); + add_buffer("p224", 67108864, params_path, &mut function_context); + add_buffer("p225", 67108864, params_path, &mut function_context); + add_buffer("p226", 8192, params_path, &mut function_context); + add_buffer("p227", 16777216, params_path, &mut function_context); + add_buffer("p228", 16384, params_path, &mut function_context); + add_buffer("p229", 16384, params_path, &mut function_context); + add_buffer("p230", 16384, params_path, &mut function_context); + add_buffer("p231", 16384, params_path, &mut function_context); + add_buffer("p232", 4194304, params_path, &mut function_context); + add_buffer("p233", 8, params_path, &mut function_context); + add_buffer("p234", 16384, params_path, &mut function_context); + add_buffer("p235", 16384, params_path, &mut function_context); + add_buffer("p236", 16384, params_path, &mut function_context); + add_buffer("p237", 16384, params_path, &mut function_context); + add_buffer("p238", 16384, params_path, &mut function_context); + add_buffer("p239", 4194304, params_path, &mut function_context); + add_buffer("p240", 16777216, params_path, &mut function_context); + add_buffer("p241", 8192, params_path, &mut function_context); + add_buffer("p242", 67108864, params_path, &mut function_context); + add_buffer("p243", 67108864, params_path, &mut function_context); + add_buffer("p244", 67108864, params_path, &mut function_context); + add_buffer("p245", 8192, params_path, &mut function_context); + add_buffer("p246", 16777216, params_path, &mut function_context); + add_buffer("p247", 16384, params_path, &mut function_context); + add_buffer("p248", 16384, params_path, &mut function_context); + add_buffer("p249", 16384, params_path, &mut function_context); + add_buffer("p250", 16384, params_path, &mut function_context); + add_buffer("p251", 4194304, params_path, &mut function_context); + add_buffer("p252", 8, params_path, &mut function_context); + add_buffer("p253", 16384, params_path, &mut function_context); + add_buffer("p254", 16384, params_path, &mut function_context); + add_buffer("p255", 16384, params_path, &mut function_context); + add_buffer("p256", 16384, params_path, &mut function_context); + add_buffer("p257", 16384, params_path, &mut function_context); + add_buffer("p258", 4194304, params_path, &mut function_context); + add_buffer("p259", 16777216, params_path, &mut function_context); + add_buffer("p260", 8192, params_path, &mut function_context); + add_buffer("p261", 67108864, params_path, &mut function_context); + add_buffer("p262", 67108864, params_path, &mut function_context); + add_buffer("p263", 67108864, params_path, &mut function_context); + add_buffer("p264", 8192, params_path, &mut function_context); + add_buffer("p265", 16777216, params_path, &mut function_context); + add_buffer("p266", 16384, params_path, &mut function_context); + add_buffer("p267", 16384, params_path, &mut function_context); + add_buffer("p268", 16384, params_path, &mut function_context); + add_buffer("p269", 16384, params_path, &mut function_context); + add_buffer("p270", 4194304, params_path, &mut function_context); + add_buffer("p271", 8, params_path, &mut function_context); + add_buffer("p272", 16384, params_path, &mut function_context); + add_buffer("p273", 16384, params_path, &mut function_context); + add_buffer("p274", 16384, params_path, &mut function_context); + add_buffer("p275", 16384, params_path, &mut function_context); + add_buffer("p276", 16384, params_path, &mut function_context); + add_buffer("p277", 4194304, params_path, &mut function_context); + add_buffer("p278", 16777216, params_path, &mut function_context); + add_buffer("p279", 8192, params_path, &mut function_context); + add_buffer("p280", 67108864, params_path, &mut function_context); + add_buffer("p281", 67108864, params_path, &mut function_context); + add_buffer("p282", 67108864, params_path, &mut function_context); + add_buffer("p283", 8192, params_path, &mut function_context); + add_buffer("p284", 16777216, params_path, &mut function_context); + add_buffer("p285", 16384, params_path, &mut function_context); + add_buffer("p286", 16384, params_path, &mut function_context); + add_buffer("p287", 16384, params_path, &mut function_context); + add_buffer("p288", 16384, params_path, &mut function_context); + add_buffer("p289", 4194304, params_path, &mut function_context); + add_buffer("p290", 8, params_path, &mut function_context); + add_buffer("p291", 16384, params_path, &mut function_context); + add_buffer("p292", 16384, params_path, &mut function_context); + add_buffer("p293", 16384, params_path, &mut function_context); + add_buffer("p294", 16384, params_path, &mut function_context); + add_buffer("p295", 16384, params_path, &mut function_context); + add_buffer("p296", 4194304, params_path, &mut function_context); + add_buffer("p297", 16777216, params_path, &mut function_context); + add_buffer("p298", 8192, params_path, &mut function_context); + add_buffer("p299", 67108864, params_path, &mut function_context); + add_buffer("p300", 67108864, params_path, &mut function_context); + add_buffer("p301", 67108864, params_path, &mut function_context); + add_buffer("p302", 8192, params_path, &mut function_context); + add_buffer("p303", 16777216, params_path, &mut function_context); + add_buffer("p304", 16384, params_path, &mut function_context); + add_buffer("p305", 16384, params_path, &mut function_context); + add_buffer("p306", 16384, params_path, &mut function_context); + add_buffer("p307", 16384, params_path, &mut function_context); + add_buffer("p308", 4194304, params_path, &mut function_context); + add_buffer("p309", 8, params_path, &mut function_context); + add_buffer("p310", 16384, params_path, &mut function_context); + add_buffer("p311", 16384, params_path, &mut function_context); + add_buffer("p312", 16384, params_path, &mut function_context); + add_buffer("p313", 16384, params_path, &mut function_context); + add_buffer("p314", 16384, params_path, &mut function_context); + add_buffer("p315", 4194304, params_path, &mut function_context); + add_buffer("p316", 16777216, params_path, &mut function_context); + add_buffer("p317", 8192, params_path, &mut function_context); + add_buffer("p318", 67108864, params_path, &mut function_context); + add_buffer("p319", 67108864, params_path, &mut function_context); + add_buffer("p320", 67108864, params_path, &mut function_context); + add_buffer("p321", 8192, params_path, &mut function_context); + add_buffer("p322", 1050673152, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 1048576; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_vit_b_16(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/vit_b_16/"; + + add_buffer("input", 602112, params_path, &mut function_context); + add_buffer("p0", 2359296, params_path, &mut function_context); + add_buffer("p1", 3072, params_path, &mut function_context); + add_buffer("p2", 3072, params_path, &mut function_context); + add_buffer("p3", 605184, params_path, &mut function_context); + add_buffer("p4", 3072, params_path, &mut function_context); + add_buffer("p5", 3072, params_path, &mut function_context); + add_buffer("p6", 7077888, params_path, &mut function_context); + add_buffer("p7", 9216, params_path, &mut function_context); + add_buffer("p8", 8, params_path, &mut function_context); + add_buffer("p9", 4, params_path, &mut function_context); + add_buffer("p10", 8, params_path, &mut function_context); + add_buffer("p11", 4, params_path, &mut function_context); + add_buffer("p12", 8, params_path, &mut function_context); + add_buffer("p13", 2359296, params_path, &mut function_context); + add_buffer("p14", 3072, params_path, &mut function_context); + add_buffer("p15", 3072, params_path, &mut function_context); + add_buffer("p16", 3072, params_path, &mut function_context); + add_buffer("p17", 9437184, params_path, &mut function_context); + add_buffer("p18", 12288, params_path, &mut function_context); + add_buffer("p19", 9437184, params_path, &mut function_context); + add_buffer("p20", 3072, params_path, &mut function_context); + add_buffer("p21", 3072, params_path, &mut function_context); + add_buffer("p22", 3072, params_path, &mut function_context); + add_buffer("p23", 7077888, params_path, &mut function_context); + add_buffer("p24", 9216, params_path, &mut function_context); + add_buffer("p25", 4, params_path, &mut function_context); + add_buffer("p26", 4, params_path, &mut function_context); + add_buffer("p27", 2359296, params_path, &mut function_context); + add_buffer("p28", 3072, params_path, &mut function_context); + add_buffer("p29", 3072, params_path, &mut function_context); + add_buffer("p30", 3072, params_path, &mut function_context); + add_buffer("p31", 9437184, params_path, &mut function_context); + add_buffer("p32", 12288, params_path, &mut function_context); + add_buffer("p33", 9437184, params_path, &mut function_context); + add_buffer("p34", 3072, params_path, &mut function_context); + add_buffer("p35", 3072, params_path, &mut function_context); + add_buffer("p36", 3072, params_path, &mut function_context); + add_buffer("p37", 7077888, params_path, &mut function_context); + add_buffer("p38", 9216, params_path, &mut function_context); + add_buffer("p39", 4, params_path, &mut function_context); + add_buffer("p40", 4, params_path, &mut function_context); + add_buffer("p41", 2359296, params_path, &mut function_context); + add_buffer("p42", 3072, params_path, &mut function_context); + add_buffer("p43", 3072, params_path, &mut function_context); + add_buffer("p44", 3072, params_path, &mut function_context); + add_buffer("p45", 9437184, params_path, &mut function_context); + add_buffer("p46", 12288, params_path, &mut function_context); + add_buffer("p47", 9437184, params_path, &mut function_context); + add_buffer("p48", 3072, params_path, &mut function_context); + add_buffer("p49", 3072, params_path, &mut function_context); + add_buffer("p50", 3072, params_path, &mut function_context); + add_buffer("p51", 7077888, params_path, &mut function_context); + add_buffer("p52", 9216, params_path, &mut function_context); + add_buffer("p53", 4, params_path, &mut function_context); + add_buffer("p54", 4, params_path, &mut function_context); + add_buffer("p55", 2359296, params_path, &mut function_context); + add_buffer("p56", 3072, params_path, &mut function_context); + add_buffer("p57", 3072, params_path, &mut function_context); + add_buffer("p58", 3072, params_path, &mut function_context); + add_buffer("p59", 9437184, params_path, &mut function_context); + add_buffer("p60", 12288, params_path, &mut function_context); + add_buffer("p61", 9437184, params_path, &mut function_context); + add_buffer("p62", 3072, params_path, &mut function_context); + add_buffer("p63", 3072, params_path, &mut function_context); + add_buffer("p64", 3072, params_path, &mut function_context); + add_buffer("p65", 7077888, params_path, &mut function_context); + add_buffer("p66", 9216, params_path, &mut function_context); + add_buffer("p67", 4, params_path, &mut function_context); + add_buffer("p68", 4, params_path, &mut function_context); + add_buffer("p69", 2359296, params_path, &mut function_context); + add_buffer("p70", 3072, params_path, &mut function_context); + add_buffer("p71", 3072, params_path, &mut function_context); + add_buffer("p72", 3072, params_path, &mut function_context); + add_buffer("p73", 9437184, params_path, &mut function_context); + add_buffer("p74", 12288, params_path, &mut function_context); + add_buffer("p75", 9437184, params_path, &mut function_context); + add_buffer("p76", 3072, params_path, &mut function_context); + add_buffer("p77", 3072, params_path, &mut function_context); + add_buffer("p78", 3072, params_path, &mut function_context); + add_buffer("p79", 7077888, params_path, &mut function_context); + add_buffer("p80", 9216, params_path, &mut function_context); + add_buffer("p81", 4, params_path, &mut function_context); + add_buffer("p82", 4, params_path, &mut function_context); + add_buffer("p83", 2359296, params_path, &mut function_context); + add_buffer("p84", 3072, params_path, &mut function_context); + add_buffer("p85", 3072, params_path, &mut function_context); + add_buffer("p86", 3072, params_path, &mut function_context); + add_buffer("p87", 9437184, params_path, &mut function_context); + add_buffer("p88", 12288, params_path, &mut function_context); + add_buffer("p89", 9437184, params_path, &mut function_context); + add_buffer("p90", 3072, params_path, &mut function_context); + add_buffer("p91", 3072, params_path, &mut function_context); + add_buffer("p92", 3072, params_path, &mut function_context); + add_buffer("p93", 7077888, params_path, &mut function_context); + add_buffer("p94", 9216, params_path, &mut function_context); + add_buffer("p95", 4, params_path, &mut function_context); + add_buffer("p96", 4, params_path, &mut function_context); + add_buffer("p97", 2359296, params_path, &mut function_context); + add_buffer("p98", 3072, params_path, &mut function_context); + add_buffer("p99", 3072, params_path, &mut function_context); + add_buffer("p100", 3072, params_path, &mut function_context); + add_buffer("p101", 9437184, params_path, &mut function_context); + add_buffer("p102", 12288, params_path, &mut function_context); + add_buffer("p103", 9437184, params_path, &mut function_context); + add_buffer("p104", 3072, params_path, &mut function_context); + add_buffer("p105", 3072, params_path, &mut function_context); + add_buffer("p106", 3072, params_path, &mut function_context); + add_buffer("p107", 7077888, params_path, &mut function_context); + add_buffer("p108", 9216, params_path, &mut function_context); + add_buffer("p109", 4, params_path, &mut function_context); + add_buffer("p110", 4, params_path, &mut function_context); + add_buffer("p111", 2359296, params_path, &mut function_context); + add_buffer("p112", 3072, params_path, &mut function_context); + add_buffer("p113", 3072, params_path, &mut function_context); + add_buffer("p114", 3072, params_path, &mut function_context); + add_buffer("p115", 9437184, params_path, &mut function_context); + add_buffer("p116", 12288, params_path, &mut function_context); + add_buffer("p117", 9437184, params_path, &mut function_context); + add_buffer("p118", 3072, params_path, &mut function_context); + add_buffer("p119", 3072, params_path, &mut function_context); + add_buffer("p120", 3072, params_path, &mut function_context); + add_buffer("p121", 7077888, params_path, &mut function_context); + add_buffer("p122", 9216, params_path, &mut function_context); + add_buffer("p123", 4, params_path, &mut function_context); + add_buffer("p124", 4, params_path, &mut function_context); + add_buffer("p125", 2359296, params_path, &mut function_context); + add_buffer("p126", 3072, params_path, &mut function_context); + add_buffer("p127", 3072, params_path, &mut function_context); + add_buffer("p128", 3072, params_path, &mut function_context); + add_buffer("p129", 9437184, params_path, &mut function_context); + add_buffer("p130", 12288, params_path, &mut function_context); + add_buffer("p131", 9437184, params_path, &mut function_context); + add_buffer("p132", 3072, params_path, &mut function_context); + add_buffer("p133", 3072, params_path, &mut function_context); + add_buffer("p134", 3072, params_path, &mut function_context); + add_buffer("p135", 7077888, params_path, &mut function_context); + add_buffer("p136", 9216, params_path, &mut function_context); + add_buffer("p137", 4, params_path, &mut function_context); + add_buffer("p138", 4, params_path, &mut function_context); + add_buffer("p139", 2359296, params_path, &mut function_context); + add_buffer("p140", 3072, params_path, &mut function_context); + add_buffer("p141", 3072, params_path, &mut function_context); + add_buffer("p142", 3072, params_path, &mut function_context); + add_buffer("p143", 9437184, params_path, &mut function_context); + add_buffer("p144", 12288, params_path, &mut function_context); + add_buffer("p145", 9437184, params_path, &mut function_context); + add_buffer("p146", 3072, params_path, &mut function_context); + add_buffer("p147", 3072, params_path, &mut function_context); + add_buffer("p148", 3072, params_path, &mut function_context); + add_buffer("p149", 7077888, params_path, &mut function_context); + add_buffer("p150", 9216, params_path, &mut function_context); + add_buffer("p151", 4, params_path, &mut function_context); + add_buffer("p152", 4, params_path, &mut function_context); + add_buffer("p153", 2359296, params_path, &mut function_context); + add_buffer("p154", 3072, params_path, &mut function_context); + add_buffer("p155", 3072, params_path, &mut function_context); + add_buffer("p156", 3072, params_path, &mut function_context); + add_buffer("p157", 9437184, params_path, &mut function_context); + add_buffer("p158", 12288, params_path, &mut function_context); + add_buffer("p159", 9437184, params_path, &mut function_context); + add_buffer("p160", 3072, params_path, &mut function_context); + add_buffer("p161", 3072, params_path, &mut function_context); + add_buffer("p162", 3072, params_path, &mut function_context); + add_buffer("p163", 7077888, params_path, &mut function_context); + add_buffer("p164", 9216, params_path, &mut function_context); + add_buffer("p165", 4, params_path, &mut function_context); + add_buffer("p166", 4, params_path, &mut function_context); + add_buffer("p167", 2359296, params_path, &mut function_context); + add_buffer("p168", 3072, params_path, &mut function_context); + add_buffer("p169", 3072, params_path, &mut function_context); + add_buffer("p170", 3072, params_path, &mut function_context); + add_buffer("p171", 9437184, params_path, &mut function_context); + add_buffer("p172", 12288, params_path, &mut function_context); + add_buffer("p173", 9437184, params_path, &mut function_context); + add_buffer("p174", 3072, params_path, &mut function_context); + add_buffer("p175", 3072, params_path, &mut function_context); + add_buffer("p176", 3072, params_path, &mut function_context); + add_buffer("p177", 3072000, params_path, &mut function_context); + add_buffer("p178", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet34(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet34/"; + + add_buffer("input", 602112, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 589824, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 589824, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 589824, params_path, &mut function_context); + add_buffer("p7", 256, params_path, &mut function_context); + add_buffer("p8", 589824, params_path, &mut function_context); + add_buffer("p9", 256, params_path, &mut function_context); + add_buffer("p10", 589824, params_path, &mut function_context); + add_buffer("p11", 256, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 256, params_path, &mut function_context); + add_buffer("p14", 294912, params_path, &mut function_context); + add_buffer("p15", 512, params_path, &mut function_context); + add_buffer("p16", 1048576, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 32768, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 1048576, params_path, &mut function_context); + add_buffer("p21", 512, params_path, &mut function_context); + add_buffer("p22", 1048576, params_path, &mut function_context); + add_buffer("p23", 512, params_path, &mut function_context); + add_buffer("p24", 1048576, params_path, &mut function_context); + add_buffer("p25", 512, params_path, &mut function_context); + add_buffer("p26", 1048576, params_path, &mut function_context); + add_buffer("p27", 512, params_path, &mut function_context); + add_buffer("p28", 1048576, params_path, &mut function_context); + add_buffer("p29", 512, params_path, &mut function_context); + add_buffer("p30", 1048576, params_path, &mut function_context); + add_buffer("p31", 512, params_path, &mut function_context); + add_buffer("p32", 1179648, params_path, &mut function_context); + add_buffer("p33", 1024, params_path, &mut function_context); + add_buffer("p34", 4194304, params_path, &mut function_context); + add_buffer("p35", 1024, params_path, &mut function_context); + add_buffer("p36", 131072, params_path, &mut function_context); + add_buffer("p37", 1024, params_path, &mut function_context); + add_buffer("p38", 4194304, params_path, &mut function_context); + add_buffer("p39", 1024, params_path, &mut function_context); + add_buffer("p40", 4194304, params_path, &mut function_context); + add_buffer("p41", 1024, params_path, &mut function_context); + add_buffer("p42", 4194304, params_path, &mut function_context); + add_buffer("p43", 1024, params_path, &mut function_context); + add_buffer("p44", 4194304, params_path, &mut function_context); + add_buffer("p45", 1024, params_path, &mut function_context); + add_buffer("p46", 4194304, params_path, &mut function_context); + add_buffer("p47", 1024, params_path, &mut function_context); + add_buffer("p48", 4194304, params_path, &mut function_context); + add_buffer("p49", 1024, params_path, &mut function_context); + add_buffer("p50", 4194304, params_path, &mut function_context); + add_buffer("p51", 1024, params_path, &mut function_context); + add_buffer("p52", 4194304, params_path, &mut function_context); + add_buffer("p53", 1024, params_path, &mut function_context); + add_buffer("p54", 4194304, params_path, &mut function_context); + add_buffer("p55", 1024, params_path, &mut function_context); + add_buffer("p56", 4194304, params_path, &mut function_context); + add_buffer("p57", 1024, params_path, &mut function_context); + add_buffer("p58", 4718592, params_path, &mut function_context); + add_buffer("p59", 2048, params_path, &mut function_context); + add_buffer("p60", 16777216, params_path, &mut function_context); + add_buffer("p61", 2048, params_path, &mut function_context); + add_buffer("p62", 524288, params_path, &mut function_context); + add_buffer("p63", 2048, params_path, &mut function_context); + add_buffer("p64", 16777216, params_path, &mut function_context); + add_buffer("p65", 2048, params_path, &mut function_context); + add_buffer("p66", 16777216, params_path, &mut function_context); + add_buffer("p67", 2048, params_path, &mut function_context); + add_buffer("p68", 16777216, params_path, &mut function_context); + add_buffer("p69", 2048, params_path, &mut function_context); + add_buffer("p70", 16777216, params_path, &mut function_context); + add_buffer("p71", 2048, params_path, &mut function_context); + add_buffer("p72", 2048000, params_path, &mut function_context); + add_buffer("p73", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet50(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet50/"; + + add_buffer("input", 602112, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 16384, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 589824, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 65536, params_path, &mut function_context); + add_buffer("p7", 1024, params_path, &mut function_context); + add_buffer("p8", 65536, params_path, &mut function_context); + add_buffer("p9", 1024, params_path, &mut function_context); + add_buffer("p10", 65536, params_path, &mut function_context); + add_buffer("p11", 256, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 256, params_path, &mut function_context); + add_buffer("p14", 65536, params_path, &mut function_context); + add_buffer("p15", 1024, params_path, &mut function_context); + add_buffer("p16", 65536, params_path, &mut function_context); + add_buffer("p17", 256, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 256, params_path, &mut function_context); + add_buffer("p20", 65536, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 131072, params_path, &mut function_context); + add_buffer("p23", 512, params_path, &mut function_context); + add_buffer("p24", 589824, params_path, &mut function_context); + add_buffer("p25", 512, params_path, &mut function_context); + add_buffer("p26", 262144, params_path, &mut function_context); + add_buffer("p27", 2048, params_path, &mut function_context); + add_buffer("p28", 524288, params_path, &mut function_context); + add_buffer("p29", 2048, params_path, &mut function_context); + add_buffer("p30", 262144, params_path, &mut function_context); + add_buffer("p31", 512, params_path, &mut function_context); + add_buffer("p32", 1048576, params_path, &mut function_context); + add_buffer("p33", 512, params_path, &mut function_context); + add_buffer("p34", 262144, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 262144, params_path, &mut function_context); + add_buffer("p37", 512, params_path, &mut function_context); + add_buffer("p38", 1048576, params_path, &mut function_context); + add_buffer("p39", 512, params_path, &mut function_context); + add_buffer("p40", 262144, params_path, &mut function_context); + add_buffer("p41", 2048, params_path, &mut function_context); + add_buffer("p42", 262144, params_path, &mut function_context); + add_buffer("p43", 512, params_path, &mut function_context); + add_buffer("p44", 1048576, params_path, &mut function_context); + add_buffer("p45", 512, params_path, &mut function_context); + add_buffer("p46", 262144, params_path, &mut function_context); + add_buffer("p47", 2048, params_path, &mut function_context); + add_buffer("p48", 524288, params_path, &mut function_context); + add_buffer("p49", 1024, params_path, &mut function_context); + add_buffer("p50", 2359296, params_path, &mut function_context); + add_buffer("p51", 1024, params_path, &mut function_context); + add_buffer("p52", 1048576, params_path, &mut function_context); + add_buffer("p53", 4096, params_path, &mut function_context); + add_buffer("p54", 2097152, params_path, &mut function_context); + add_buffer("p55", 4096, params_path, &mut function_context); + add_buffer("p56", 1048576, params_path, &mut function_context); + add_buffer("p57", 1024, params_path, &mut function_context); + add_buffer("p58", 4194304, params_path, &mut function_context); + add_buffer("p59", 1024, params_path, &mut function_context); + add_buffer("p60", 1048576, params_path, &mut function_context); + add_buffer("p61", 4096, params_path, &mut function_context); + add_buffer("p62", 1048576, params_path, &mut function_context); + add_buffer("p63", 1024, params_path, &mut function_context); + add_buffer("p64", 4194304, params_path, &mut function_context); + add_buffer("p65", 1024, params_path, &mut function_context); + add_buffer("p66", 1048576, params_path, &mut function_context); + add_buffer("p67", 4096, params_path, &mut function_context); + add_buffer("p68", 1048576, params_path, &mut function_context); + add_buffer("p69", 1024, params_path, &mut function_context); + add_buffer("p70", 4194304, params_path, &mut function_context); + add_buffer("p71", 1024, params_path, &mut function_context); + add_buffer("p72", 1048576, params_path, &mut function_context); + add_buffer("p73", 4096, params_path, &mut function_context); + add_buffer("p74", 1048576, params_path, &mut function_context); + add_buffer("p75", 1024, params_path, &mut function_context); + add_buffer("p76", 4194304, params_path, &mut function_context); + add_buffer("p77", 1024, params_path, &mut function_context); + add_buffer("p78", 1048576, params_path, &mut function_context); + add_buffer("p79", 4096, params_path, &mut function_context); + add_buffer("p80", 1048576, params_path, &mut function_context); + add_buffer("p81", 1024, params_path, &mut function_context); + add_buffer("p82", 4194304, params_path, &mut function_context); + add_buffer("p83", 1024, params_path, &mut function_context); + add_buffer("p84", 1048576, params_path, &mut function_context); + add_buffer("p85", 4096, params_path, &mut function_context); + add_buffer("p86", 2097152, params_path, &mut function_context); + add_buffer("p87", 2048, params_path, &mut function_context); + add_buffer("p88", 9437184, params_path, &mut function_context); + add_buffer("p89", 2048, params_path, &mut function_context); + add_buffer("p90", 4194304, params_path, &mut function_context); + add_buffer("p91", 8192, params_path, &mut function_context); + add_buffer("p92", 8388608, params_path, &mut function_context); + add_buffer("p93", 8192, params_path, &mut function_context); + add_buffer("p94", 4194304, params_path, &mut function_context); + add_buffer("p95", 2048, params_path, &mut function_context); + add_buffer("p96", 16777216, params_path, &mut function_context); + add_buffer("p97", 2048, params_path, &mut function_context); + add_buffer("p98", 4194304, params_path, &mut function_context); + add_buffer("p99", 8192, params_path, &mut function_context); + add_buffer("p100", 4194304, params_path, &mut function_context); + add_buffer("p101", 2048, params_path, &mut function_context); + add_buffer("p102", 16777216, params_path, &mut function_context); + add_buffer("p103", 2048, params_path, &mut function_context); + add_buffer("p104", 4194304, params_path, &mut function_context); + add_buffer("p105", 8192, params_path, &mut function_context); + add_buffer("p106", 8192000, params_path, &mut function_context); + add_buffer("p107", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet101(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet101/"; + + add_buffer("input", 602112, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 16384, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 589824, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 65536, params_path, &mut function_context); + add_buffer("p7", 1024, params_path, &mut function_context); + add_buffer("p8", 65536, params_path, &mut function_context); + add_buffer("p9", 1024, params_path, &mut function_context); + add_buffer("p10", 65536, params_path, &mut function_context); + add_buffer("p11", 256, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 256, params_path, &mut function_context); + add_buffer("p14", 65536, params_path, &mut function_context); + add_buffer("p15", 1024, params_path, &mut function_context); + add_buffer("p16", 65536, params_path, &mut function_context); + add_buffer("p17", 256, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 256, params_path, &mut function_context); + add_buffer("p20", 65536, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 131072, params_path, &mut function_context); + add_buffer("p23", 512, params_path, &mut function_context); + add_buffer("p24", 589824, params_path, &mut function_context); + add_buffer("p25", 512, params_path, &mut function_context); + add_buffer("p26", 262144, params_path, &mut function_context); + add_buffer("p27", 2048, params_path, &mut function_context); + add_buffer("p28", 524288, params_path, &mut function_context); + add_buffer("p29", 2048, params_path, &mut function_context); + add_buffer("p30", 262144, params_path, &mut function_context); + add_buffer("p31", 512, params_path, &mut function_context); + add_buffer("p32", 1048576, params_path, &mut function_context); + add_buffer("p33", 512, params_path, &mut function_context); + add_buffer("p34", 262144, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 262144, params_path, &mut function_context); + add_buffer("p37", 512, params_path, &mut function_context); + add_buffer("p38", 1048576, params_path, &mut function_context); + add_buffer("p39", 512, params_path, &mut function_context); + add_buffer("p40", 262144, params_path, &mut function_context); + add_buffer("p41", 2048, params_path, &mut function_context); + add_buffer("p42", 262144, params_path, &mut function_context); + add_buffer("p43", 512, params_path, &mut function_context); + add_buffer("p44", 1048576, params_path, &mut function_context); + add_buffer("p45", 512, params_path, &mut function_context); + add_buffer("p46", 262144, params_path, &mut function_context); + add_buffer("p47", 2048, params_path, &mut function_context); + add_buffer("p48", 524288, params_path, &mut function_context); + add_buffer("p49", 1024, params_path, &mut function_context); + add_buffer("p50", 2359296, params_path, &mut function_context); + add_buffer("p51", 1024, params_path, &mut function_context); + add_buffer("p52", 1048576, params_path, &mut function_context); + add_buffer("p53", 4096, params_path, &mut function_context); + add_buffer("p54", 2097152, params_path, &mut function_context); + add_buffer("p55", 4096, params_path, &mut function_context); + add_buffer("p56", 1048576, params_path, &mut function_context); + add_buffer("p57", 1024, params_path, &mut function_context); + add_buffer("p58", 4194304, params_path, &mut function_context); + add_buffer("p59", 1024, params_path, &mut function_context); + add_buffer("p60", 1048576, params_path, &mut function_context); + add_buffer("p61", 4096, params_path, &mut function_context); + add_buffer("p62", 1048576, params_path, &mut function_context); + add_buffer("p63", 1024, params_path, &mut function_context); + add_buffer("p64", 4194304, params_path, &mut function_context); + add_buffer("p65", 1024, params_path, &mut function_context); + add_buffer("p66", 1048576, params_path, &mut function_context); + add_buffer("p67", 4096, params_path, &mut function_context); + add_buffer("p68", 1048576, params_path, &mut function_context); + add_buffer("p69", 1024, params_path, &mut function_context); + add_buffer("p70", 4194304, params_path, &mut function_context); + add_buffer("p71", 1024, params_path, &mut function_context); + add_buffer("p72", 1048576, params_path, &mut function_context); + add_buffer("p73", 4096, params_path, &mut function_context); + add_buffer("p74", 1048576, params_path, &mut function_context); + add_buffer("p75", 1024, params_path, &mut function_context); + add_buffer("p76", 4194304, params_path, &mut function_context); + add_buffer("p77", 1024, params_path, &mut function_context); + add_buffer("p78", 1048576, params_path, &mut function_context); + add_buffer("p79", 4096, params_path, &mut function_context); + add_buffer("p80", 1048576, params_path, &mut function_context); + add_buffer("p81", 1024, params_path, &mut function_context); + add_buffer("p82", 4194304, params_path, &mut function_context); + add_buffer("p83", 1024, params_path, &mut function_context); + add_buffer("p84", 1048576, params_path, &mut function_context); + add_buffer("p85", 4096, params_path, &mut function_context); + add_buffer("p86", 1048576, params_path, &mut function_context); + add_buffer("p87", 1024, params_path, &mut function_context); + add_buffer("p88", 4194304, params_path, &mut function_context); + add_buffer("p89", 1024, params_path, &mut function_context); + add_buffer("p90", 1048576, params_path, &mut function_context); + add_buffer("p91", 4096, params_path, &mut function_context); + add_buffer("p92", 1048576, params_path, &mut function_context); + add_buffer("p93", 1024, params_path, &mut function_context); + add_buffer("p94", 4194304, params_path, &mut function_context); + add_buffer("p95", 1024, params_path, &mut function_context); + add_buffer("p96", 1048576, params_path, &mut function_context); + add_buffer("p97", 4096, params_path, &mut function_context); + add_buffer("p98", 1048576, params_path, &mut function_context); + add_buffer("p99", 1024, params_path, &mut function_context); + add_buffer("p100", 4194304, params_path, &mut function_context); + add_buffer("p101", 1024, params_path, &mut function_context); + add_buffer("p102", 1048576, params_path, &mut function_context); + add_buffer("p103", 4096, params_path, &mut function_context); + add_buffer("p104", 1048576, params_path, &mut function_context); + add_buffer("p105", 1024, params_path, &mut function_context); + add_buffer("p106", 4194304, params_path, &mut function_context); + add_buffer("p107", 1024, params_path, &mut function_context); + add_buffer("p108", 1048576, params_path, &mut function_context); + add_buffer("p109", 4096, params_path, &mut function_context); + add_buffer("p110", 1048576, params_path, &mut function_context); + add_buffer("p111", 1024, params_path, &mut function_context); + add_buffer("p112", 4194304, params_path, &mut function_context); + add_buffer("p113", 1024, params_path, &mut function_context); + add_buffer("p114", 1048576, params_path, &mut function_context); + add_buffer("p115", 4096, params_path, &mut function_context); + add_buffer("p116", 1048576, params_path, &mut function_context); + add_buffer("p117", 1024, params_path, &mut function_context); + add_buffer("p118", 4194304, params_path, &mut function_context); + add_buffer("p119", 1024, params_path, &mut function_context); + add_buffer("p120", 1048576, params_path, &mut function_context); + add_buffer("p121", 4096, params_path, &mut function_context); + add_buffer("p122", 1048576, params_path, &mut function_context); + add_buffer("p123", 1024, params_path, &mut function_context); + add_buffer("p124", 4194304, params_path, &mut function_context); + add_buffer("p125", 1024, params_path, &mut function_context); + add_buffer("p126", 1048576, params_path, &mut function_context); + add_buffer("p127", 4096, params_path, &mut function_context); + add_buffer("p128", 1048576, params_path, &mut function_context); + add_buffer("p129", 1024, params_path, &mut function_context); + add_buffer("p130", 4194304, params_path, &mut function_context); + add_buffer("p131", 1024, params_path, &mut function_context); + add_buffer("p132", 1048576, params_path, &mut function_context); + add_buffer("p133", 4096, params_path, &mut function_context); + add_buffer("p134", 1048576, params_path, &mut function_context); + add_buffer("p135", 1024, params_path, &mut function_context); + add_buffer("p136", 4194304, params_path, &mut function_context); + add_buffer("p137", 1024, params_path, &mut function_context); + add_buffer("p138", 1048576, params_path, &mut function_context); + add_buffer("p139", 4096, params_path, &mut function_context); + add_buffer("p140", 1048576, params_path, &mut function_context); + add_buffer("p141", 1024, params_path, &mut function_context); + add_buffer("p142", 4194304, params_path, &mut function_context); + add_buffer("p143", 1024, params_path, &mut function_context); + add_buffer("p144", 1048576, params_path, &mut function_context); + add_buffer("p145", 4096, params_path, &mut function_context); + add_buffer("p146", 1048576, params_path, &mut function_context); + add_buffer("p147", 1024, params_path, &mut function_context); + add_buffer("p148", 4194304, params_path, &mut function_context); + add_buffer("p149", 1024, params_path, &mut function_context); + add_buffer("p150", 1048576, params_path, &mut function_context); + add_buffer("p151", 4096, params_path, &mut function_context); + add_buffer("p152", 1048576, params_path, &mut function_context); + add_buffer("p153", 1024, params_path, &mut function_context); + add_buffer("p154", 4194304, params_path, &mut function_context); + add_buffer("p155", 1024, params_path, &mut function_context); + add_buffer("p156", 1048576, params_path, &mut function_context); + add_buffer("p157", 4096, params_path, &mut function_context); + add_buffer("p158", 1048576, params_path, &mut function_context); + add_buffer("p159", 1024, params_path, &mut function_context); + add_buffer("p160", 4194304, params_path, &mut function_context); + add_buffer("p161", 1024, params_path, &mut function_context); + add_buffer("p162", 1048576, params_path, &mut function_context); + add_buffer("p163", 4096, params_path, &mut function_context); + add_buffer("p164", 1048576, params_path, &mut function_context); + add_buffer("p165", 1024, params_path, &mut function_context); + add_buffer("p166", 4194304, params_path, &mut function_context); + add_buffer("p167", 1024, params_path, &mut function_context); + add_buffer("p168", 1048576, params_path, &mut function_context); + add_buffer("p169", 4096, params_path, &mut function_context); + add_buffer("p170", 1048576, params_path, &mut function_context); + add_buffer("p171", 1024, params_path, &mut function_context); + add_buffer("p172", 4194304, params_path, &mut function_context); + add_buffer("p173", 1024, params_path, &mut function_context); + add_buffer("p174", 1048576, params_path, &mut function_context); + add_buffer("p175", 4096, params_path, &mut function_context); + add_buffer("p176", 1048576, params_path, &mut function_context); + add_buffer("p177", 1024, params_path, &mut function_context); + add_buffer("p178", 4194304, params_path, &mut function_context); + add_buffer("p179", 1024, params_path, &mut function_context); + add_buffer("p180", 1048576, params_path, &mut function_context); + add_buffer("p181", 4096, params_path, &mut function_context); + add_buffer("p182", 1048576, params_path, &mut function_context); + add_buffer("p183", 1024, params_path, &mut function_context); + add_buffer("p184", 4194304, params_path, &mut function_context); + add_buffer("p185", 1024, params_path, &mut function_context); + add_buffer("p186", 1048576, params_path, &mut function_context); + add_buffer("p187", 4096, params_path, &mut function_context); + add_buffer("p188", 2097152, params_path, &mut function_context); + add_buffer("p189", 2048, params_path, &mut function_context); + add_buffer("p190", 9437184, params_path, &mut function_context); + add_buffer("p191", 2048, params_path, &mut function_context); + add_buffer("p192", 4194304, params_path, &mut function_context); + add_buffer("p193", 8192, params_path, &mut function_context); + add_buffer("p194", 8388608, params_path, &mut function_context); + add_buffer("p195", 8192, params_path, &mut function_context); + add_buffer("p196", 4194304, params_path, &mut function_context); + add_buffer("p197", 2048, params_path, &mut function_context); + add_buffer("p198", 16777216, params_path, &mut function_context); + add_buffer("p199", 2048, params_path, &mut function_context); + add_buffer("p200", 4194304, params_path, &mut function_context); + add_buffer("p201", 8192, params_path, &mut function_context); + add_buffer("p202", 4194304, params_path, &mut function_context); + add_buffer("p203", 2048, params_path, &mut function_context); + add_buffer("p204", 16777216, params_path, &mut function_context); + add_buffer("p205", 2048, params_path, &mut function_context); + add_buffer("p206", 4194304, params_path, &mut function_context); + add_buffer("p207", 8192, params_path, &mut function_context); + add_buffer("p208", 8192000, params_path, &mut function_context); + add_buffer("p209", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet152(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet152/"; + + add_buffer("input", 602112, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 16384, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 589824, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 65536, params_path, &mut function_context); + add_buffer("p7", 1024, params_path, &mut function_context); + add_buffer("p8", 65536, params_path, &mut function_context); + add_buffer("p9", 1024, params_path, &mut function_context); + add_buffer("p10", 65536, params_path, &mut function_context); + add_buffer("p11", 256, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 256, params_path, &mut function_context); + add_buffer("p14", 65536, params_path, &mut function_context); + add_buffer("p15", 1024, params_path, &mut function_context); + add_buffer("p16", 65536, params_path, &mut function_context); + add_buffer("p17", 256, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 256, params_path, &mut function_context); + add_buffer("p20", 65536, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 131072, params_path, &mut function_context); + add_buffer("p23", 512, params_path, &mut function_context); + add_buffer("p24", 589824, params_path, &mut function_context); + add_buffer("p25", 512, params_path, &mut function_context); + add_buffer("p26", 262144, params_path, &mut function_context); + add_buffer("p27", 2048, params_path, &mut function_context); + add_buffer("p28", 524288, params_path, &mut function_context); + add_buffer("p29", 2048, params_path, &mut function_context); + add_buffer("p30", 262144, params_path, &mut function_context); + add_buffer("p31", 512, params_path, &mut function_context); + add_buffer("p32", 1048576, params_path, &mut function_context); + add_buffer("p33", 512, params_path, &mut function_context); + add_buffer("p34", 262144, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 262144, params_path, &mut function_context); + add_buffer("p37", 512, params_path, &mut function_context); + add_buffer("p38", 1048576, params_path, &mut function_context); + add_buffer("p39", 512, params_path, &mut function_context); + add_buffer("p40", 262144, params_path, &mut function_context); + add_buffer("p41", 2048, params_path, &mut function_context); + add_buffer("p42", 262144, params_path, &mut function_context); + add_buffer("p43", 512, params_path, &mut function_context); + add_buffer("p44", 1048576, params_path, &mut function_context); + add_buffer("p45", 512, params_path, &mut function_context); + add_buffer("p46", 262144, params_path, &mut function_context); + add_buffer("p47", 2048, params_path, &mut function_context); + add_buffer("p48", 262144, params_path, &mut function_context); + add_buffer("p49", 512, params_path, &mut function_context); + add_buffer("p50", 1048576, params_path, &mut function_context); + add_buffer("p51", 512, params_path, &mut function_context); + add_buffer("p52", 262144, params_path, &mut function_context); + add_buffer("p53", 2048, params_path, &mut function_context); + add_buffer("p54", 262144, params_path, &mut function_context); + add_buffer("p55", 512, params_path, &mut function_context); + add_buffer("p56", 1048576, params_path, &mut function_context); + add_buffer("p57", 512, params_path, &mut function_context); + add_buffer("p58", 262144, params_path, &mut function_context); + add_buffer("p59", 2048, params_path, &mut function_context); + add_buffer("p60", 262144, params_path, &mut function_context); + add_buffer("p61", 512, params_path, &mut function_context); + add_buffer("p62", 1048576, params_path, &mut function_context); + add_buffer("p63", 512, params_path, &mut function_context); + add_buffer("p64", 262144, params_path, &mut function_context); + add_buffer("p65", 2048, params_path, &mut function_context); + add_buffer("p66", 262144, params_path, &mut function_context); + add_buffer("p67", 512, params_path, &mut function_context); + add_buffer("p68", 1048576, params_path, &mut function_context); + add_buffer("p69", 512, params_path, &mut function_context); + add_buffer("p70", 262144, params_path, &mut function_context); + add_buffer("p71", 2048, params_path, &mut function_context); + add_buffer("p72", 524288, params_path, &mut function_context); + add_buffer("p73", 1024, params_path, &mut function_context); + add_buffer("p74", 2359296, params_path, &mut function_context); + add_buffer("p75", 1024, params_path, &mut function_context); + add_buffer("p76", 1048576, params_path, &mut function_context); + add_buffer("p77", 4096, params_path, &mut function_context); + add_buffer("p78", 2097152, params_path, &mut function_context); + add_buffer("p79", 4096, params_path, &mut function_context); + add_buffer("p80", 1048576, params_path, &mut function_context); + add_buffer("p81", 1024, params_path, &mut function_context); + add_buffer("p82", 4194304, params_path, &mut function_context); + add_buffer("p83", 1024, params_path, &mut function_context); + add_buffer("p84", 1048576, params_path, &mut function_context); + add_buffer("p85", 4096, params_path, &mut function_context); + add_buffer("p86", 1048576, params_path, &mut function_context); + add_buffer("p87", 1024, params_path, &mut function_context); + add_buffer("p88", 4194304, params_path, &mut function_context); + add_buffer("p89", 1024, params_path, &mut function_context); + add_buffer("p90", 1048576, params_path, &mut function_context); + add_buffer("p91", 4096, params_path, &mut function_context); + add_buffer("p92", 1048576, params_path, &mut function_context); + add_buffer("p93", 1024, params_path, &mut function_context); + add_buffer("p94", 4194304, params_path, &mut function_context); + add_buffer("p95", 1024, params_path, &mut function_context); + add_buffer("p96", 1048576, params_path, &mut function_context); + add_buffer("p97", 4096, params_path, &mut function_context); + add_buffer("p98", 1048576, params_path, &mut function_context); + add_buffer("p99", 1024, params_path, &mut function_context); + add_buffer("p100", 4194304, params_path, &mut function_context); + add_buffer("p101", 1024, params_path, &mut function_context); + add_buffer("p102", 1048576, params_path, &mut function_context); + add_buffer("p103", 4096, params_path, &mut function_context); + add_buffer("p104", 1048576, params_path, &mut function_context); + add_buffer("p105", 1024, params_path, &mut function_context); + add_buffer("p106", 4194304, params_path, &mut function_context); + add_buffer("p107", 1024, params_path, &mut function_context); + add_buffer("p108", 1048576, params_path, &mut function_context); + add_buffer("p109", 4096, params_path, &mut function_context); + add_buffer("p110", 1048576, params_path, &mut function_context); + add_buffer("p111", 1024, params_path, &mut function_context); + add_buffer("p112", 4194304, params_path, &mut function_context); + add_buffer("p113", 1024, params_path, &mut function_context); + add_buffer("p114", 1048576, params_path, &mut function_context); + add_buffer("p115", 4096, params_path, &mut function_context); + add_buffer("p116", 1048576, params_path, &mut function_context); + add_buffer("p117", 1024, params_path, &mut function_context); + add_buffer("p118", 4194304, params_path, &mut function_context); + add_buffer("p119", 1024, params_path, &mut function_context); + add_buffer("p120", 1048576, params_path, &mut function_context); + add_buffer("p121", 4096, params_path, &mut function_context); + add_buffer("p122", 1048576, params_path, &mut function_context); + add_buffer("p123", 1024, params_path, &mut function_context); + add_buffer("p124", 4194304, params_path, &mut function_context); + add_buffer("p125", 1024, params_path, &mut function_context); + add_buffer("p126", 1048576, params_path, &mut function_context); + add_buffer("p127", 4096, params_path, &mut function_context); + add_buffer("p128", 1048576, params_path, &mut function_context); + add_buffer("p129", 1024, params_path, &mut function_context); + add_buffer("p130", 4194304, params_path, &mut function_context); + add_buffer("p131", 1024, params_path, &mut function_context); + add_buffer("p132", 1048576, params_path, &mut function_context); + add_buffer("p133", 4096, params_path, &mut function_context); + add_buffer("p134", 1048576, params_path, &mut function_context); + add_buffer("p135", 1024, params_path, &mut function_context); + add_buffer("p136", 4194304, params_path, &mut function_context); + add_buffer("p137", 1024, params_path, &mut function_context); + add_buffer("p138", 1048576, params_path, &mut function_context); + add_buffer("p139", 4096, params_path, &mut function_context); + add_buffer("p140", 1048576, params_path, &mut function_context); + add_buffer("p141", 1024, params_path, &mut function_context); + add_buffer("p142", 4194304, params_path, &mut function_context); + add_buffer("p143", 1024, params_path, &mut function_context); + add_buffer("p144", 1048576, params_path, &mut function_context); + add_buffer("p145", 4096, params_path, &mut function_context); + add_buffer("p146", 1048576, params_path, &mut function_context); + add_buffer("p147", 1024, params_path, &mut function_context); + add_buffer("p148", 4194304, params_path, &mut function_context); + add_buffer("p149", 1024, params_path, &mut function_context); + add_buffer("p150", 1048576, params_path, &mut function_context); + add_buffer("p151", 4096, params_path, &mut function_context); + add_buffer("p152", 1048576, params_path, &mut function_context); + add_buffer("p153", 1024, params_path, &mut function_context); + add_buffer("p154", 4194304, params_path, &mut function_context); + add_buffer("p155", 1024, params_path, &mut function_context); + add_buffer("p156", 1048576, params_path, &mut function_context); + add_buffer("p157", 4096, params_path, &mut function_context); + add_buffer("p158", 1048576, params_path, &mut function_context); + add_buffer("p159", 1024, params_path, &mut function_context); + add_buffer("p160", 4194304, params_path, &mut function_context); + add_buffer("p161", 1024, params_path, &mut function_context); + add_buffer("p162", 1048576, params_path, &mut function_context); + add_buffer("p163", 4096, params_path, &mut function_context); + add_buffer("p164", 1048576, params_path, &mut function_context); + add_buffer("p165", 1024, params_path, &mut function_context); + add_buffer("p166", 4194304, params_path, &mut function_context); + add_buffer("p167", 1024, params_path, &mut function_context); + add_buffer("p168", 1048576, params_path, &mut function_context); + add_buffer("p169", 4096, params_path, &mut function_context); + add_buffer("p170", 1048576, params_path, &mut function_context); + add_buffer("p171", 1024, params_path, &mut function_context); + add_buffer("p172", 4194304, params_path, &mut function_context); + add_buffer("p173", 1024, params_path, &mut function_context); + add_buffer("p174", 1048576, params_path, &mut function_context); + add_buffer("p175", 4096, params_path, &mut function_context); + add_buffer("p176", 1048576, params_path, &mut function_context); + add_buffer("p177", 1024, params_path, &mut function_context); + add_buffer("p178", 4194304, params_path, &mut function_context); + add_buffer("p179", 1024, params_path, &mut function_context); + add_buffer("p180", 1048576, params_path, &mut function_context); + add_buffer("p181", 4096, params_path, &mut function_context); + add_buffer("p182", 1048576, params_path, &mut function_context); + add_buffer("p183", 1024, params_path, &mut function_context); + add_buffer("p184", 4194304, params_path, &mut function_context); + add_buffer("p185", 1024, params_path, &mut function_context); + add_buffer("p186", 1048576, params_path, &mut function_context); + add_buffer("p187", 4096, params_path, &mut function_context); + add_buffer("p188", 1048576, params_path, &mut function_context); + add_buffer("p189", 1024, params_path, &mut function_context); + add_buffer("p190", 4194304, params_path, &mut function_context); + add_buffer("p191", 1024, params_path, &mut function_context); + add_buffer("p192", 1048576, params_path, &mut function_context); + add_buffer("p193", 4096, params_path, &mut function_context); + add_buffer("p194", 1048576, params_path, &mut function_context); + add_buffer("p195", 1024, params_path, &mut function_context); + add_buffer("p196", 4194304, params_path, &mut function_context); + add_buffer("p197", 1024, params_path, &mut function_context); + add_buffer("p198", 1048576, params_path, &mut function_context); + add_buffer("p199", 4096, params_path, &mut function_context); + add_buffer("p200", 1048576, params_path, &mut function_context); + add_buffer("p201", 1024, params_path, &mut function_context); + add_buffer("p202", 4194304, params_path, &mut function_context); + add_buffer("p203", 1024, params_path, &mut function_context); + add_buffer("p204", 1048576, params_path, &mut function_context); + add_buffer("p205", 4096, params_path, &mut function_context); + add_buffer("p206", 1048576, params_path, &mut function_context); + add_buffer("p207", 1024, params_path, &mut function_context); + add_buffer("p208", 4194304, params_path, &mut function_context); + add_buffer("p209", 1024, params_path, &mut function_context); + add_buffer("p210", 1048576, params_path, &mut function_context); + add_buffer("p211", 4096, params_path, &mut function_context); + add_buffer("p212", 1048576, params_path, &mut function_context); + add_buffer("p213", 1024, params_path, &mut function_context); + add_buffer("p214", 4194304, params_path, &mut function_context); + add_buffer("p215", 1024, params_path, &mut function_context); + add_buffer("p216", 1048576, params_path, &mut function_context); + add_buffer("p217", 4096, params_path, &mut function_context); + add_buffer("p218", 1048576, params_path, &mut function_context); + add_buffer("p219", 1024, params_path, &mut function_context); + add_buffer("p220", 4194304, params_path, &mut function_context); + add_buffer("p221", 1024, params_path, &mut function_context); + add_buffer("p222", 1048576, params_path, &mut function_context); + add_buffer("p223", 4096, params_path, &mut function_context); + add_buffer("p224", 1048576, params_path, &mut function_context); + add_buffer("p225", 1024, params_path, &mut function_context); + add_buffer("p226", 4194304, params_path, &mut function_context); + add_buffer("p227", 1024, params_path, &mut function_context); + add_buffer("p228", 1048576, params_path, &mut function_context); + add_buffer("p229", 4096, params_path, &mut function_context); + add_buffer("p230", 1048576, params_path, &mut function_context); + add_buffer("p231", 1024, params_path, &mut function_context); + add_buffer("p232", 4194304, params_path, &mut function_context); + add_buffer("p233", 1024, params_path, &mut function_context); + add_buffer("p234", 1048576, params_path, &mut function_context); + add_buffer("p235", 4096, params_path, &mut function_context); + add_buffer("p236", 1048576, params_path, &mut function_context); + add_buffer("p237", 1024, params_path, &mut function_context); + add_buffer("p238", 4194304, params_path, &mut function_context); + add_buffer("p239", 1024, params_path, &mut function_context); + add_buffer("p240", 1048576, params_path, &mut function_context); + add_buffer("p241", 4096, params_path, &mut function_context); + add_buffer("p242", 1048576, params_path, &mut function_context); + add_buffer("p243", 1024, params_path, &mut function_context); + add_buffer("p244", 4194304, params_path, &mut function_context); + add_buffer("p245", 1024, params_path, &mut function_context); + add_buffer("p246", 1048576, params_path, &mut function_context); + add_buffer("p247", 4096, params_path, &mut function_context); + add_buffer("p248", 1048576, params_path, &mut function_context); + add_buffer("p249", 1024, params_path, &mut function_context); + add_buffer("p250", 4194304, params_path, &mut function_context); + add_buffer("p251", 1024, params_path, &mut function_context); + add_buffer("p252", 1048576, params_path, &mut function_context); + add_buffer("p253", 4096, params_path, &mut function_context); + add_buffer("p254", 1048576, params_path, &mut function_context); + add_buffer("p255", 1024, params_path, &mut function_context); + add_buffer("p256", 4194304, params_path, &mut function_context); + add_buffer("p257", 1024, params_path, &mut function_context); + add_buffer("p258", 1048576, params_path, &mut function_context); + add_buffer("p259", 4096, params_path, &mut function_context); + add_buffer("p260", 1048576, params_path, &mut function_context); + add_buffer("p261", 1024, params_path, &mut function_context); + add_buffer("p262", 4194304, params_path, &mut function_context); + add_buffer("p263", 1024, params_path, &mut function_context); + add_buffer("p264", 1048576, params_path, &mut function_context); + add_buffer("p265", 4096, params_path, &mut function_context); + add_buffer("p266", 1048576, params_path, &mut function_context); + add_buffer("p267", 1024, params_path, &mut function_context); + add_buffer("p268", 4194304, params_path, &mut function_context); + add_buffer("p269", 1024, params_path, &mut function_context); + add_buffer("p270", 1048576, params_path, &mut function_context); + add_buffer("p271", 4096, params_path, &mut function_context); + add_buffer("p272", 1048576, params_path, &mut function_context); + add_buffer("p273", 1024, params_path, &mut function_context); + add_buffer("p274", 4194304, params_path, &mut function_context); + add_buffer("p275", 1024, params_path, &mut function_context); + add_buffer("p276", 1048576, params_path, &mut function_context); + add_buffer("p277", 4096, params_path, &mut function_context); + add_buffer("p278", 1048576, params_path, &mut function_context); + add_buffer("p279", 1024, params_path, &mut function_context); + add_buffer("p280", 4194304, params_path, &mut function_context); + add_buffer("p281", 1024, params_path, &mut function_context); + add_buffer("p282", 1048576, params_path, &mut function_context); + add_buffer("p283", 4096, params_path, &mut function_context); + add_buffer("p284", 1048576, params_path, &mut function_context); + add_buffer("p285", 1024, params_path, &mut function_context); + add_buffer("p286", 4194304, params_path, &mut function_context); + add_buffer("p287", 1024, params_path, &mut function_context); + add_buffer("p288", 1048576, params_path, &mut function_context); + add_buffer("p289", 4096, params_path, &mut function_context); + add_buffer("p290", 2097152, params_path, &mut function_context); + add_buffer("p291", 2048, params_path, &mut function_context); + add_buffer("p292", 9437184, params_path, &mut function_context); + add_buffer("p293", 2048, params_path, &mut function_context); + add_buffer("p294", 4194304, params_path, &mut function_context); + add_buffer("p295", 8192, params_path, &mut function_context); + add_buffer("p296", 8388608, params_path, &mut function_context); + add_buffer("p297", 8192, params_path, &mut function_context); + add_buffer("p298", 4194304, params_path, &mut function_context); + add_buffer("p299", 2048, params_path, &mut function_context); + add_buffer("p300", 16777216, params_path, &mut function_context); + add_buffer("p301", 2048, params_path, &mut function_context); + add_buffer("p302", 4194304, params_path, &mut function_context); + add_buffer("p303", 8192, params_path, &mut function_context); + add_buffer("p304", 4194304, params_path, &mut function_context); + add_buffer("p305", 2048, params_path, &mut function_context); + add_buffer("p306", 16777216, params_path, &mut function_context); + add_buffer("p307", 2048, params_path, &mut function_context); + add_buffer("p308", 4194304, params_path, &mut function_context); + add_buffer("p309", 8192, params_path, &mut function_context); + add_buffer("p310", 8192000, params_path, &mut function_context); + add_buffer("p311", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_mobilenetv2(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/mobilenetv2/"; + + add_buffer("input", 602112, params_path, &mut function_context); + add_buffer("p0", 3456, params_path, &mut function_context); + add_buffer("p1", 128, params_path, &mut function_context); + add_buffer("p2", 1152, params_path, &mut function_context); + add_buffer("p3", 128, params_path, &mut function_context); + add_buffer("p4", 2048, params_path, &mut function_context); + add_buffer("p5", 64, params_path, &mut function_context); + add_buffer("p6", 6144, params_path, &mut function_context); + add_buffer("p7", 384, params_path, &mut function_context); + add_buffer("p8", 3456, params_path, &mut function_context); + add_buffer("p9", 384, params_path, &mut function_context); + add_buffer("p10", 9216, params_path, &mut function_context); + add_buffer("p11", 96, params_path, &mut function_context); + add_buffer("p12", 13824, params_path, &mut function_context); + add_buffer("p13", 576, params_path, &mut function_context); + add_buffer("p14", 5184, params_path, &mut function_context); + add_buffer("p15", 576, params_path, &mut function_context); + add_buffer("p16", 13824, params_path, &mut function_context); + add_buffer("p17", 96, params_path, &mut function_context); + add_buffer("p18", 13824, params_path, &mut function_context); + add_buffer("p19", 576, params_path, &mut function_context); + add_buffer("p20", 5184, params_path, &mut function_context); + add_buffer("p21", 576, params_path, &mut function_context); + add_buffer("p22", 18432, params_path, &mut function_context); + add_buffer("p23", 128, params_path, &mut function_context); + add_buffer("p24", 24576, params_path, &mut function_context); + add_buffer("p25", 768, params_path, &mut function_context); + add_buffer("p26", 6912, params_path, &mut function_context); + add_buffer("p27", 768, params_path, &mut function_context); + add_buffer("p28", 24576, params_path, &mut function_context); + add_buffer("p29", 128, params_path, &mut function_context); + add_buffer("p30", 24576, params_path, &mut function_context); + add_buffer("p31", 768, params_path, &mut function_context); + add_buffer("p32", 6912, params_path, &mut function_context); + add_buffer("p33", 768, params_path, &mut function_context); + add_buffer("p34", 24576, params_path, &mut function_context); + add_buffer("p35", 128, params_path, &mut function_context); + add_buffer("p36", 24576, params_path, &mut function_context); + add_buffer("p37", 768, params_path, &mut function_context); + add_buffer("p38", 6912, params_path, &mut function_context); + add_buffer("p39", 768, params_path, &mut function_context); + add_buffer("p40", 49152, params_path, &mut function_context); + add_buffer("p41", 256, params_path, &mut function_context); + add_buffer("p42", 98304, params_path, &mut function_context); + add_buffer("p43", 1536, params_path, &mut function_context); + add_buffer("p44", 13824, params_path, &mut function_context); + add_buffer("p45", 1536, params_path, &mut function_context); + add_buffer("p46", 98304, params_path, &mut function_context); + add_buffer("p47", 256, params_path, &mut function_context); + add_buffer("p48", 98304, params_path, &mut function_context); + add_buffer("p49", 1536, params_path, &mut function_context); + add_buffer("p50", 13824, params_path, &mut function_context); + add_buffer("p51", 1536, params_path, &mut function_context); + add_buffer("p52", 98304, params_path, &mut function_context); + add_buffer("p53", 256, params_path, &mut function_context); + add_buffer("p54", 98304, params_path, &mut function_context); + add_buffer("p55", 1536, params_path, &mut function_context); + add_buffer("p56", 13824, params_path, &mut function_context); + add_buffer("p57", 1536, params_path, &mut function_context); + add_buffer("p58", 98304, params_path, &mut function_context); + add_buffer("p59", 256, params_path, &mut function_context); + add_buffer("p60", 98304, params_path, &mut function_context); + add_buffer("p61", 1536, params_path, &mut function_context); + add_buffer("p62", 13824, params_path, &mut function_context); + add_buffer("p63", 1536, params_path, &mut function_context); + add_buffer("p64", 147456, params_path, &mut function_context); + add_buffer("p65", 384, params_path, &mut function_context); + add_buffer("p66", 221184, params_path, &mut function_context); + add_buffer("p67", 2304, params_path, &mut function_context); + add_buffer("p68", 20736, params_path, &mut function_context); + add_buffer("p69", 2304, params_path, &mut function_context); + add_buffer("p70", 221184, params_path, &mut function_context); + add_buffer("p71", 384, params_path, &mut function_context); + add_buffer("p72", 221184, params_path, &mut function_context); + add_buffer("p73", 2304, params_path, &mut function_context); + add_buffer("p74", 20736, params_path, &mut function_context); + add_buffer("p75", 2304, params_path, &mut function_context); + add_buffer("p76", 221184, params_path, &mut function_context); + add_buffer("p77", 384, params_path, &mut function_context); + add_buffer("p78", 221184, params_path, &mut function_context); + add_buffer("p79", 2304, params_path, &mut function_context); + add_buffer("p80", 20736, params_path, &mut function_context); + add_buffer("p81", 2304, params_path, &mut function_context); + add_buffer("p82", 368640, params_path, &mut function_context); + add_buffer("p83", 640, params_path, &mut function_context); + add_buffer("p84", 614400, params_path, &mut function_context); + add_buffer("p85", 3840, params_path, &mut function_context); + add_buffer("p86", 34560, params_path, &mut function_context); + add_buffer("p87", 3840, params_path, &mut function_context); + add_buffer("p88", 614400, params_path, &mut function_context); + add_buffer("p89", 640, params_path, &mut function_context); + add_buffer("p90", 614400, params_path, &mut function_context); + add_buffer("p91", 3840, params_path, &mut function_context); + add_buffer("p92", 34560, params_path, &mut function_context); + add_buffer("p93", 3840, params_path, &mut function_context); + add_buffer("p94", 614400, params_path, &mut function_context); + add_buffer("p95", 640, params_path, &mut function_context); + add_buffer("p96", 614400, params_path, &mut function_context); + add_buffer("p97", 3840, params_path, &mut function_context); + add_buffer("p98", 34560, params_path, &mut function_context); + add_buffer("p99", 3840, params_path, &mut function_context); + add_buffer("p100", 1228800, params_path, &mut function_context); + add_buffer("p101", 1280, params_path, &mut function_context); + add_buffer("p102", 1638400, params_path, &mut function_context); + add_buffer("p103", 5120, params_path, &mut function_context); + add_buffer("p104", 5120000, params_path, &mut function_context); + add_buffer("p105", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_simple(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/simple/"; + + add_buffer("test", 16, params_path, &mut function_context); + add_buffer("p0", 8, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 8; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_double_matmul(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/double_matmul/"; + + add_buffer("input", 16, params_path, &mut function_context); + add_buffer("p0", 80, params_path, &mut function_context); + add_buffer("p1", 60, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 12; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet18(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet18/"; + + add_buffer("input", 602112, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 589824, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 589824, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 589824, params_path, &mut function_context); + add_buffer("p7", 256, params_path, &mut function_context); + add_buffer("p8", 589824, params_path, &mut function_context); + add_buffer("p9", 256, params_path, &mut function_context); + add_buffer("p10", 294912, params_path, &mut function_context); + add_buffer("p11", 512, params_path, &mut function_context); + add_buffer("p12", 1048576, params_path, &mut function_context); + add_buffer("p13", 512, params_path, &mut function_context); + add_buffer("p14", 32768, params_path, &mut function_context); + add_buffer("p15", 512, params_path, &mut function_context); + add_buffer("p16", 1048576, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 1048576, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 1179648, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 4194304, params_path, &mut function_context); + add_buffer("p23", 1024, params_path, &mut function_context); + add_buffer("p24", 131072, params_path, &mut function_context); + add_buffer("p25", 1024, params_path, &mut function_context); + add_buffer("p26", 4194304, params_path, &mut function_context); + add_buffer("p27", 1024, params_path, &mut function_context); + add_buffer("p28", 4194304, params_path, &mut function_context); + add_buffer("p29", 1024, params_path, &mut function_context); + add_buffer("p30", 4718592, params_path, &mut function_context); + add_buffer("p31", 2048, params_path, &mut function_context); + add_buffer("p32", 16777216, params_path, &mut function_context); + add_buffer("p33", 2048, params_path, &mut function_context); + add_buffer("p34", 524288, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 16777216, params_path, &mut function_context); + add_buffer("p37", 2048, params_path, &mut function_context); + add_buffer("p38", 16777216, params_path, &mut function_context); + add_buffer("p39", 2048, params_path, &mut function_context); + add_buffer("p40", 2048000, params_path, &mut function_context); + add_buffer("p41", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet18_batch2(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet18_batch2/"; + + add_buffer("input", 1204224, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 147456, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 147456, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 147456, params_path, &mut function_context); + add_buffer("p7", 256, params_path, &mut function_context); + add_buffer("p8", 147456, params_path, &mut function_context); + add_buffer("p9", 256, params_path, &mut function_context); + add_buffer("p10", 294912, params_path, &mut function_context); + add_buffer("p11", 512, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 512, params_path, &mut function_context); + add_buffer("p14", 32768, params_path, &mut function_context); + add_buffer("p15", 512, params_path, &mut function_context); + add_buffer("p16", 589824, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 1179648, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 2359296, params_path, &mut function_context); + add_buffer("p23", 1024, params_path, &mut function_context); + add_buffer("p24", 131072, params_path, &mut function_context); + add_buffer("p25", 1024, params_path, &mut function_context); + add_buffer("p26", 2359296, params_path, &mut function_context); + add_buffer("p27", 1024, params_path, &mut function_context); + add_buffer("p28", 2359296, params_path, &mut function_context); + add_buffer("p29", 1024, params_path, &mut function_context); + add_buffer("p30", 4718592, params_path, &mut function_context); + add_buffer("p31", 2048, params_path, &mut function_context); + add_buffer("p32", 9437184, params_path, &mut function_context); + add_buffer("p33", 2048, params_path, &mut function_context); + add_buffer("p34", 524288, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 9437184, params_path, &mut function_context); + add_buffer("p37", 2048, params_path, &mut function_context); + add_buffer("p38", 9437184, params_path, &mut function_context); + add_buffer("p39", 2048, params_path, &mut function_context); + add_buffer("p40", 2048000, params_path, &mut function_context); + add_buffer("p41", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 8000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet18_batch4(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet18_batch4/"; + + add_buffer("input", 2408448, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 147456, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 147456, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 147456, params_path, &mut function_context); + add_buffer("p7", 256, params_path, &mut function_context); + add_buffer("p8", 147456, params_path, &mut function_context); + add_buffer("p9", 256, params_path, &mut function_context); + add_buffer("p10", 294912, params_path, &mut function_context); + add_buffer("p11", 512, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 512, params_path, &mut function_context); + add_buffer("p14", 32768, params_path, &mut function_context); + add_buffer("p15", 512, params_path, &mut function_context); + add_buffer("p16", 589824, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 1179648, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 2359296, params_path, &mut function_context); + add_buffer("p23", 1024, params_path, &mut function_context); + add_buffer("p24", 131072, params_path, &mut function_context); + add_buffer("p25", 1024, params_path, &mut function_context); + add_buffer("p26", 2359296, params_path, &mut function_context); + add_buffer("p27", 1024, params_path, &mut function_context); + add_buffer("p28", 2359296, params_path, &mut function_context); + add_buffer("p29", 1024, params_path, &mut function_context); + add_buffer("p30", 4718592, params_path, &mut function_context); + add_buffer("p31", 2048, params_path, &mut function_context); + add_buffer("p32", 9437184, params_path, &mut function_context); + add_buffer("p33", 2048, params_path, &mut function_context); + add_buffer("p34", 524288, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 9437184, params_path, &mut function_context); + add_buffer("p37", 2048, params_path, &mut function_context); + add_buffer("p38", 9437184, params_path, &mut function_context); + add_buffer("p39", 2048, params_path, &mut function_context); + add_buffer("p40", 2048000, params_path, &mut function_context); + add_buffer("p41", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 16000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet18_batch8(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet18_batch8/"; + + add_buffer("input", 4816896, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 147456, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 147456, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 147456, params_path, &mut function_context); + add_buffer("p7", 256, params_path, &mut function_context); + add_buffer("p8", 147456, params_path, &mut function_context); + add_buffer("p9", 256, params_path, &mut function_context); + add_buffer("p10", 294912, params_path, &mut function_context); + add_buffer("p11", 512, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 512, params_path, &mut function_context); + add_buffer("p14", 32768, params_path, &mut function_context); + add_buffer("p15", 512, params_path, &mut function_context); + add_buffer("p16", 589824, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 1179648, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 2359296, params_path, &mut function_context); + add_buffer("p23", 1024, params_path, &mut function_context); + add_buffer("p24", 131072, params_path, &mut function_context); + add_buffer("p25", 1024, params_path, &mut function_context); + add_buffer("p26", 2359296, params_path, &mut function_context); + add_buffer("p27", 1024, params_path, &mut function_context); + add_buffer("p28", 2359296, params_path, &mut function_context); + add_buffer("p29", 1024, params_path, &mut function_context); + add_buffer("p30", 4718592, params_path, &mut function_context); + add_buffer("p31", 2048, params_path, &mut function_context); + add_buffer("p32", 9437184, params_path, &mut function_context); + add_buffer("p33", 2048, params_path, &mut function_context); + add_buffer("p34", 524288, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 9437184, params_path, &mut function_context); + add_buffer("p37", 2048, params_path, &mut function_context); + add_buffer("p38", 9437184, params_path, &mut function_context); + add_buffer("p39", 2048, params_path, &mut function_context); + add_buffer("p40", 2048000, params_path, &mut function_context); + add_buffer("p41", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 32000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet18_batch16(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet18_batch16/"; + + add_buffer("input", 9633792, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 147456, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 147456, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 147456, params_path, &mut function_context); + add_buffer("p7", 256, params_path, &mut function_context); + add_buffer("p8", 147456, params_path, &mut function_context); + add_buffer("p9", 256, params_path, &mut function_context); + add_buffer("p10", 294912, params_path, &mut function_context); + add_buffer("p11", 512, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 512, params_path, &mut function_context); + add_buffer("p14", 32768, params_path, &mut function_context); + add_buffer("p15", 512, params_path, &mut function_context); + add_buffer("p16", 589824, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 1179648, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 2359296, params_path, &mut function_context); + add_buffer("p23", 1024, params_path, &mut function_context); + add_buffer("p24", 131072, params_path, &mut function_context); + add_buffer("p25", 1024, params_path, &mut function_context); + add_buffer("p26", 2359296, params_path, &mut function_context); + add_buffer("p27", 1024, params_path, &mut function_context); + add_buffer("p28", 2359296, params_path, &mut function_context); + add_buffer("p29", 1024, params_path, &mut function_context); + add_buffer("p30", 4718592, params_path, &mut function_context); + add_buffer("p31", 2048, params_path, &mut function_context); + add_buffer("p32", 9437184, params_path, &mut function_context); + add_buffer("p33", 2048, params_path, &mut function_context); + add_buffer("p34", 524288, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 9437184, params_path, &mut function_context); + add_buffer("p37", 2048, params_path, &mut function_context); + add_buffer("p38", 9437184, params_path, &mut function_context); + add_buffer("p39", 2048, params_path, &mut function_context); + add_buffer("p40", 2048000, params_path, &mut function_context); + add_buffer("p41", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 64000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet18_batch32(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet18_batch32/"; + + add_buffer("input", 19267584, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 147456, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 147456, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 147456, params_path, &mut function_context); + add_buffer("p7", 256, params_path, &mut function_context); + add_buffer("p8", 147456, params_path, &mut function_context); + add_buffer("p9", 256, params_path, &mut function_context); + add_buffer("p10", 294912, params_path, &mut function_context); + add_buffer("p11", 512, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 512, params_path, &mut function_context); + add_buffer("p14", 32768, params_path, &mut function_context); + add_buffer("p15", 512, params_path, &mut function_context); + add_buffer("p16", 589824, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 1179648, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 2359296, params_path, &mut function_context); + add_buffer("p23", 1024, params_path, &mut function_context); + add_buffer("p24", 131072, params_path, &mut function_context); + add_buffer("p25", 1024, params_path, &mut function_context); + add_buffer("p26", 2359296, params_path, &mut function_context); + add_buffer("p27", 1024, params_path, &mut function_context); + add_buffer("p28", 2359296, params_path, &mut function_context); + add_buffer("p29", 1024, params_path, &mut function_context); + add_buffer("p30", 4718592, params_path, &mut function_context); + add_buffer("p31", 2048, params_path, &mut function_context); + add_buffer("p32", 9437184, params_path, &mut function_context); + add_buffer("p33", 2048, params_path, &mut function_context); + add_buffer("p34", 524288, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 9437184, params_path, &mut function_context); + add_buffer("p37", 2048, params_path, &mut function_context); + add_buffer("p38", 9437184, params_path, &mut function_context); + add_buffer("p39", 2048, params_path, &mut function_context); + add_buffer("p40", 2048000, params_path, &mut function_context); + add_buffer("p41", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 128000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet18_batch64(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet18_batch64/"; + + add_buffer("input", 38535168, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 147456, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 147456, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 147456, params_path, &mut function_context); + add_buffer("p7", 256, params_path, &mut function_context); + add_buffer("p8", 147456, params_path, &mut function_context); + add_buffer("p9", 256, params_path, &mut function_context); + add_buffer("p10", 294912, params_path, &mut function_context); + add_buffer("p11", 512, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 512, params_path, &mut function_context); + add_buffer("p14", 32768, params_path, &mut function_context); + add_buffer("p15", 512, params_path, &mut function_context); + add_buffer("p16", 589824, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 1179648, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 2359296, params_path, &mut function_context); + add_buffer("p23", 1024, params_path, &mut function_context); + add_buffer("p24", 131072, params_path, &mut function_context); + add_buffer("p25", 1024, params_path, &mut function_context); + add_buffer("p26", 2359296, params_path, &mut function_context); + add_buffer("p27", 1024, params_path, &mut function_context); + add_buffer("p28", 2359296, params_path, &mut function_context); + add_buffer("p29", 1024, params_path, &mut function_context); + add_buffer("p30", 4718592, params_path, &mut function_context); + add_buffer("p31", 2048, params_path, &mut function_context); + add_buffer("p32", 9437184, params_path, &mut function_context); + add_buffer("p33", 2048, params_path, &mut function_context); + add_buffer("p34", 524288, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 9437184, params_path, &mut function_context); + add_buffer("p37", 2048, params_path, &mut function_context); + add_buffer("p38", 9437184, params_path, &mut function_context); + add_buffer("p39", 2048, params_path, &mut function_context); + add_buffer("p40", 2048000, params_path, &mut function_context); + add_buffer("p41", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 256000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet18_batch3(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet18_batch3/"; + + add_buffer("input", 1806336, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 147456, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 147456, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 147456, params_path, &mut function_context); + add_buffer("p7", 256, params_path, &mut function_context); + add_buffer("p8", 147456, params_path, &mut function_context); + add_buffer("p9", 256, params_path, &mut function_context); + add_buffer("p10", 294912, params_path, &mut function_context); + add_buffer("p11", 512, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 512, params_path, &mut function_context); + add_buffer("p14", 32768, params_path, &mut function_context); + add_buffer("p15", 512, params_path, &mut function_context); + add_buffer("p16", 589824, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 1179648, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 2359296, params_path, &mut function_context); + add_buffer("p23", 1024, params_path, &mut function_context); + add_buffer("p24", 131072, params_path, &mut function_context); + add_buffer("p25", 1024, params_path, &mut function_context); + add_buffer("p26", 2359296, params_path, &mut function_context); + add_buffer("p27", 1024, params_path, &mut function_context); + add_buffer("p28", 2359296, params_path, &mut function_context); + add_buffer("p29", 1024, params_path, &mut function_context); + add_buffer("p30", 4718592, params_path, &mut function_context); + add_buffer("p31", 2048, params_path, &mut function_context); + add_buffer("p32", 9437184, params_path, &mut function_context); + add_buffer("p33", 2048, params_path, &mut function_context); + add_buffer("p34", 524288, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 9437184, params_path, &mut function_context); + add_buffer("p37", 2048, params_path, &mut function_context); + add_buffer("p38", 9437184, params_path, &mut function_context); + add_buffer("p39", 2048, params_path, &mut function_context); + add_buffer("p40", 2048000, params_path, &mut function_context); + add_buffer("p41", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 12000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} + +pub fn load_resnet18_batch5(mut function_context: Context) -> (usize, String, Vec, Context) { + let params_path = "/pub/scratch/alrusso/TVM-compilation/model_parameters/resnet18_batch5/"; + + add_buffer("input", 3010560, params_path, &mut function_context); + add_buffer("p0", 37632, params_path, &mut function_context); + add_buffer("p1", 256, params_path, &mut function_context); + add_buffer("p2", 147456, params_path, &mut function_context); + add_buffer("p3", 256, params_path, &mut function_context); + add_buffer("p4", 147456, params_path, &mut function_context); + add_buffer("p5", 256, params_path, &mut function_context); + add_buffer("p6", 147456, params_path, &mut function_context); + add_buffer("p7", 256, params_path, &mut function_context); + add_buffer("p8", 147456, params_path, &mut function_context); + add_buffer("p9", 256, params_path, &mut function_context); + add_buffer("p10", 294912, params_path, &mut function_context); + add_buffer("p11", 512, params_path, &mut function_context); + add_buffer("p12", 589824, params_path, &mut function_context); + add_buffer("p13", 512, params_path, &mut function_context); + add_buffer("p14", 32768, params_path, &mut function_context); + add_buffer("p15", 512, params_path, &mut function_context); + add_buffer("p16", 589824, params_path, &mut function_context); + add_buffer("p17", 512, params_path, &mut function_context); + add_buffer("p18", 589824, params_path, &mut function_context); + add_buffer("p19", 512, params_path, &mut function_context); + add_buffer("p20", 1179648, params_path, &mut function_context); + add_buffer("p21", 1024, params_path, &mut function_context); + add_buffer("p22", 2359296, params_path, &mut function_context); + add_buffer("p23", 1024, params_path, &mut function_context); + add_buffer("p24", 131072, params_path, &mut function_context); + add_buffer("p25", 1024, params_path, &mut function_context); + add_buffer("p26", 2359296, params_path, &mut function_context); + add_buffer("p27", 1024, params_path, &mut function_context); + add_buffer("p28", 2359296, params_path, &mut function_context); + add_buffer("p29", 1024, params_path, &mut function_context); + add_buffer("p30", 4718592, params_path, &mut function_context); + add_buffer("p31", 2048, params_path, &mut function_context); + add_buffer("p32", 9437184, params_path, &mut function_context); + add_buffer("p33", 2048, params_path, &mut function_context); + add_buffer("p34", 524288, params_path, &mut function_context); + add_buffer("p35", 2048, params_path, &mut function_context); + add_buffer("p36", 9437184, params_path, &mut function_context); + add_buffer("p37", 2048, params_path, &mut function_context); + add_buffer("p38", 9437184, params_path, &mut function_context); + add_buffer("p39", 2048, params_path, &mut function_context); + add_buffer("p40", 2048000, params_path, &mut function_context); + add_buffer("p41", 4000, params_path, &mut function_context); + + let output_name: &str = "output"; + let output_size: usize = 20000; + let expected: Vec = read_tensor_from_file(output_name, params_path).unwrap(); + + ( + output_size, + output_name.to_string(), + expected, + function_context, + ) +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/hip_tests.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/hip_tests.rs new file mode 100644 index 00000000..4c585c81 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/hip_tests.rs @@ -0,0 +1,623 @@ +mod load_utils; +mod tests_utils; + +use crate::{ + function_driver::{ + compute_driver::{ + compute_driver_tests::compute_driver_tests::{ + engine_minimal, get_expected_mat, prepare_engine_and_function, + }, + gpu::gpu_tests::{get_driver, Arc, Archive, ArchiveInit, RecordPoint, GPU_LOCK}, + }, + ComputeResource, Driver, WorkToDo, + }, + memory_domain::{gpu::GpuMemoryDomain, ContextTrait, MemoryResource}, + DataItem, DataSet, Position, +}; + +#[test] +fn minimal() { + let lock = GPU_LOCK.lock().unwrap(); + let driver: Box = get_driver(); + engine_minimal::( + &format!( + "{}/tests/data/hip/test_gpu_minimal.json", + env!("CARGO_MANIFEST_DIR") + ), + MemoryResource::None, + driver, + vec![ComputeResource::GPU(7, 1, 2)], + ); + drop(lock); +} + +#[test] +fn basic_input_output() { + let lock = GPU_LOCK.lock().unwrap(); + let driver: Box = get_driver(); + let (mut function_context, config, queue) = prepare_engine_and_function::( + &format!( + "{}/tests/data/hip/test_gpu_basic_io.json", + env!("CARGO_MANIFEST_DIR") + ), + MemoryResource::None, + &driver, + vec![ComputeResource::GPU(7, 0, 2)], + ); + // add inputs + let in_size_offset = function_context + .get_free_space_and_write_slice(&[12345i64]) + .expect("Should have space for single i64"); + function_context.content.push(Some(DataSet { + ident: "A".to_string(), + buffers: vec![DataItem { + ident: "".to_string(), + data: Position { + offset: in_size_offset as usize, + size: 8, + }, + key: 0, + }], + })); + let archive = Box::leak(Box::new(Archive::init(ArchiveInit { + #[cfg(feature = "timestamp")] + timestamp_count: 1000, + }))); + + let mut recorder = archive.get_recorder().unwrap(); + recorder + .record(RecordPoint::TransferEnd) + .expect("Should have properly initialized recorder state"); + let promise = queue.enqueu(WorkToDo::FunctionArguments { + config, + context: function_context, + output_sets: Arc::new(vec!["A".into()]), + recorder: recorder.get_sub_recorder().unwrap(), + }); + queue.enqueu(WorkToDo::Shutdown()); + let result_context = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap() + .block_on(promise) + .expect("Engine should run ok with basic function") + .get_context(); + recorder + .record(RecordPoint::FutureReturn) + .expect("Should have properly advanced recorder state"); + assert_eq!(1, result_context.content.len()); + let output_item = result_context.content[0] + .as_ref() + .expect("Set should be present"); + eprintln!("{:?}", output_item); + assert_eq!(1, output_item.buffers.len()); + let position = output_item.buffers[0].data; + assert_eq!(8, position.size, "Checking for size of output"); + let mut read_buffer = vec![0i64; position.size / 8]; + result_context + .context + .read(position.offset, &mut read_buffer) + .expect("Should succeed in reading"); + assert_eq!(98765, read_buffer[0]); + drop(lock); +} + +#[test] +fn engine_matmul_3x3_loop() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_matmul_loop.json", + env!("CARGO_MANIFEST_DIR") + ); + let dom_init = MemoryResource::None; + let driver: Box = get_driver(); + let drv_init = vec![ComputeResource::GPU(7, 0, 2)]; + let (mut function_context, config, queue) = + prepare_engine_and_function::(filename, dom_init, &driver, drv_init); + // add inputs, split over two buffers to test concatenating them in GPU memory + let in_size_offset = function_context + .get_free_space_and_write_slice(&[3i64]) + .expect("Should have space"); + let offset2 = function_context + .get_free_space_and_write_slice(&[0i64, 1i64, 2i64, 3i64, 4i64, 5i64, 6i64, 7i64, 8i64]) + .expect("Should have space"); + function_context.content.push(Some(DataSet { + ident: "A".to_string(), + buffers: vec![ + DataItem { + ident: "".to_string(), + data: Position { + offset: in_size_offset as usize, + size: 8, + }, + key: 0, + }, + DataItem { + ident: "".to_string(), + data: Position { + offset: offset2 as usize, + size: 72, + }, + key: 0, + }, + ], + })); + let archive = Box::leak(Box::new(Archive::init(ArchiveInit { + #[cfg(feature = "timestamp")] + timestamp_count: 1000, + }))); + + let mut recorder = archive.get_recorder().unwrap(); + recorder + .record(RecordPoint::TransferEnd) + .expect("Should have properly initialized recorder state"); + let promise = queue.enqueu(WorkToDo::FunctionArguments { + config, + context: function_context, + output_sets: Arc::new(vec![String::from("B")]), + recorder: recorder.get_sub_recorder().unwrap(), + }); + queue.enqueu(WorkToDo::Shutdown()); + let result_context = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap() + .block_on(promise) + .expect("Engine should run ok with basic function") + .get_context(); + recorder + .record(RecordPoint::FutureReturn) + .expect("Should have properly advanced recorder state"); + assert_eq!(1, result_context.content.len()); + let output_item = result_context.content[0] + .as_ref() + .expect("Set should be present"); + assert_eq!(1, output_item.buffers.len()); + let position = output_item.buffers[0].data; + assert_eq!(80, position.size, "Checking for size of output"); + let mut read_buffer = vec![0i64; position.size / 8]; + result_context + .context + .read(position.offset, &mut read_buffer) + .expect("Should succeed in reading"); + assert_eq!(3, read_buffer[0]); + let expected = self::get_expected_mat(3); + assert_eq!(3i64, read_buffer[0]); + for (should, is) in expected.iter().zip(read_buffer[1..].iter()) { + assert_eq!(should, is); + } + drop(lock); +} + +#[test] +fn engine_matmul_size_sweep_parallel() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_matmul_para.json", + env!("CARGO_MANIFEST_DIR") + ); + let dom_init = MemoryResource::None; + let driver: Box = get_driver(); + let drv_init = vec![ComputeResource::GPU(7, 0, 2)]; + const LOWER_SIZE_BOUND: usize = 2; + const UPPER_SIZE_BOUND: usize = 16; + for mat_size in LOWER_SIZE_BOUND..UPPER_SIZE_BOUND { + let (mut function_context, config, queue) = prepare_engine_and_function::( + filename, + dom_init, + &driver, + drv_init.clone(), + ); + // add inputs, split over two buffers to test concatenating them in GPU memory + let in_size_offset = function_context + .get_free_space_and_write_slice(&[mat_size as i64]) + .expect("Should have space"); + + let mut mat_vec = Vec::::new(); + for i in 0..(mat_size * mat_size) { + mat_vec.push(i as i64); + } + let in_mat_offset = function_context + .get_free_space_and_write_slice(&mat_vec) + .expect("Should have space") as usize; + function_context.content.push(Some(DataSet { + ident: "A".to_string(), + buffers: vec![ + DataItem { + ident: "".to_string(), + data: Position { + offset: in_size_offset as usize, + size: 8, + }, + key: 0, + }, + DataItem { + ident: "".to_string(), + data: Position { + offset: in_mat_offset, + size: mat_vec.len() * 8, + }, + key: 0, + }, + ], + })); + let cfg_offset = function_context + .get_free_space_and_write_slice(&[(mat_size + 31) / 32]) + .expect("Should be able to write cfg"); + function_context.content.push(Some(DataSet { + ident: "cfg".to_string(), + buffers: vec![DataItem { + ident: "".to_string(), + data: Position { + offset: cfg_offset as usize, + size: 8, + }, + key: 0, + }], + })); + + let archive = Box::leak(Box::new(Archive::init(ArchiveInit { + #[cfg(feature = "timestamp")] + timestamp_count: 1000, + }))); + + let mut recorder = archive.get_recorder().unwrap(); + recorder + .record(RecordPoint::TransferEnd) + .expect("Should have properly initialized recorder state"); + let promise = queue.enqueu(WorkToDo::FunctionArguments { + config, + context: function_context, + output_sets: Arc::new(vec![String::from("B")]), + recorder: recorder.get_sub_recorder().unwrap(), + }); + queue.enqueu(WorkToDo::Shutdown()); + let result_context = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap() + .block_on(promise) + .expect("Engine should run ok with basic function") + .get_context(); + recorder + .record(RecordPoint::FutureReturn) + .expect("Should have properly advanced recorder state"); + assert_eq!(1, result_context.content.len()); + let output_item = &result_context.content[0] + .as_ref() + .expect("Set should be present"); + assert_eq!(1, output_item.buffers.len()); + let position = output_item.buffers[0].data; + assert_eq!( + (mat_size * mat_size + 1) * 8, + position.size, + "Checking for size of output" + ); + let mut output = vec![0i64; position.size / 8]; + result_context + .context + .read(position.offset, &mut output) + .expect("Should succeed in reading"); + let expected = self::get_expected_mat(mat_size); + assert_eq!(mat_size as i64, output[0]); + for (should, is) in expected.iter().zip(output[1..].iter()) { + assert_eq!(should, is); + } + } + drop(lock); +} + +fn get_inference_inputs() -> (Vec, Vec) { + let mut a: Vec = Vec::with_capacity(224 * 224 + 2); + let mut b: Vec = Vec::with_capacity(5 * 5 + 2); + + a.push(224.0); + a.push(224.0); + for i in 0..224 * 224 { + a.push(i as f32); + } + + b.push(5.0); + b.push(5.0); + for i in 0..5 * 5 { + b.push(i as f32); + } + + (a, b) +} + +fn get_expected_inference_output() -> Vec { + let (a, b) = get_inference_inputs(); + let mut c = vec![0f32; a.len()]; + let mut d = vec![0f32; c.len() / 2 + 1]; + + // convolution + c[0] = 224f32; + c[1] = 224f32; + for i in 0..224 { + for j in 0..224 { + let mut sum = 0f32; + for k in -2..=2i32 { + for l in -2..=2i32 { + let cur_row = i - k; + let cur_col = j - l; + + let filter_row = 2 + k; + let filter_col = 2 + l; + + if (0..224).contains(&cur_row) && (0..224).contains(&cur_col) { + let cur_row = cur_row as usize; // There has to be a better way to do this... + let cur_col = cur_col as usize; + let filter_row = filter_row as usize; + let filter_col = filter_col as usize; + sum += a[2 + cur_row * 224 + cur_col] * b[2 + filter_row * 5 + filter_col]; + } + } + } + let i = i as usize; + let j = j as usize; + c[2 + i * 224 + j] = sum; + } + } + + // relu + for i in 0..224 * 224 { + c[2 + i] = c[2 + i].max(0f32); + } + + // max pooling + d[0] = 112f32; + d[1] = 112f32; + for i in 0..112 { + for j in 0..112 { + let mut max = f32::NEG_INFINITY; + let base_row = i * 2; + let base_col = j * 2; + for k in 0..2 { + for l in 0..2 { + max = max.max(c[2 + (base_row + k) * 224 + (base_col + l)]); + } + } + d[2 + i * 112 + j] = max; + } + } + + d +} + +#[test] +fn inference_benchmark_function() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_inference.json", + env!("CARGO_MANIFEST_DIR") + ); + let dom_init = MemoryResource::None; + let driver: Box = get_driver(); + let drv_init = vec![ComputeResource::GPU(7, 0, 2)]; + let (mut function_context, config, queue) = + prepare_engine_and_function::(filename, dom_init, &driver, drv_init); + let d_size: usize = 112 * 112 * 4 + 8; //side_len * side_len * sizeof(float) + [[size convention at start]] + let a_grid_dim: usize = (224 + 31) / 32; + let d_grid_dim: usize = (112 + 31) / 32; + let (a_matrix, b_matrix) = get_inference_inputs(); + let cfg_offset = function_context + .get_free_space_and_write_slice(&[d_size, a_grid_dim, d_grid_dim, 500]) + .expect("Should have space for cfg"); + let a_offset = function_context + .get_free_space_and_write_slice(&a_matrix) + .expect("Should have space for A"); + let b_offset = function_context + .get_free_space_and_write_slice(&b_matrix) + .expect("Should have space for B"); + + function_context.content.append(&mut vec![ + Some(DataSet { + ident: "cfg".to_string(), + buffers: vec![DataItem { + ident: "".to_string(), + data: Position { + offset: cfg_offset as usize, + size: 3 * 8, + }, + key: 0, + }], + }), + Some(DataSet { + ident: "A".to_string(), + buffers: vec![DataItem { + ident: "".to_string(), + data: Position { + offset: a_offset as usize, + size: a_matrix.len() * 4, + }, + key: 0, + }], + }), + Some(DataSet { + ident: "B".to_string(), + buffers: vec![DataItem { + ident: "".to_string(), + data: Position { + offset: b_offset as usize, + size: b_matrix.len() * 4, + }, + key: 0, + }], + }), + ]); + let archive = Box::leak(Box::new(Archive::init(ArchiveInit { + #[cfg(feature = "timestamp")] + timestamp_count: 1000, + }))); + + let mut recorder = archive.get_recorder().unwrap(); + recorder + .record(RecordPoint::TransferEnd) + .expect("Should have properly initialized recorder state"); + let promise = queue.enqueu(WorkToDo::FunctionArguments { + config, + context: function_context, + output_sets: Arc::new(vec![String::from("D")]), + recorder: recorder.get_sub_recorder().unwrap(), + }); + queue.enqueu(WorkToDo::Shutdown()); + let result_context = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap() + .block_on(promise) + .expect("Engine should run ok with basic function") + .get_context(); + recorder + .record(RecordPoint::FutureReturn) + .expect("Should have properly advanced recorder state"); + assert_eq!(1, result_context.content.len()); + let output_item = result_context.content[0] + .as_ref() + .expect("Set should be present"); + assert_eq!(1, output_item.buffers.len()); + let position = output_item.buffers[0].data; + assert_eq!( + 112 * 112 * 4 + 8, + position.size, + "Checking for size of output" + ); + let mut read_buffer = vec![0f32; position.size / 4]; + result_context + .context + .read(position.offset, &mut read_buffer) + .expect("Should succeed in reading"); + let expected = get_expected_inference_output(); + for (idx, (should, is)) in expected.iter().zip(read_buffer[..].iter()).enumerate() { + assert_eq!(should, is, "Comparing args at {}", idx); + } + drop(lock); +} + +#[test] +fn hello_world() { + let lock = GPU_LOCK.lock().unwrap(); + let driver: Box = get_driver(); + let (function_context, config, queue) = prepare_engine_and_function::( + &format!( + "{}/tests/data/hip/test_gpu_hello_world.json", + env!("CARGO_MANIFEST_DIR") + ), + MemoryResource::None, + &driver, + vec![ComputeResource::GPU(7, 0, 2)], + ); + + let archive = Box::leak(Box::new(Archive::init(ArchiveInit { + #[cfg(feature = "timestamp")] + timestamp_count: 1000, + }))); + + let mut recorder = archive.get_recorder().unwrap(); + recorder + .record(RecordPoint::TransferEnd) + .expect("Should have properly initialized recorder state"); + let promise = queue.enqueu(WorkToDo::FunctionArguments { + config, + context: function_context, + output_sets: Arc::new(vec!["A".into()]), + recorder: recorder.get_sub_recorder().unwrap(), + }); + queue.enqueu(WorkToDo::Shutdown()); + drop(lock); +} + +#[test] +fn engine_first_double_matmul() { + use std::env; + env::set_var("RUST_BACKTRACE", "1"); + + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_first_double_matmul.json", + env!("CARGO_MANIFEST_DIR") + ); + let dom_init = MemoryResource::None; + let driver: Box = get_driver(); + let drv_init = vec![ComputeResource::GPU(7, 0, 2)]; + let (mut function_context, config, queue) = + prepare_engine_and_function::(filename, dom_init, &driver, drv_init); + + let a_size: usize = 4 * 2 * (32 / 8); + let weights_size = 2 * 5 * (32 / 8); + let b_size = 4 * 5 * (32 / 8); + + let a_offset = function_context + .get_free_space_and_write_slice(&[ + 1.0f32, 2.0f32, 3.0f32, 4.0f32, 5.0f32, 6.0f32, 7.0f32, 8.0f32, + ]) + .expect("Should have space"); + let weights_offset = function_context + .get_free_space_and_write_slice(&[ + 1.0f32, 2.0f32, 3.0f32, 4.0f32, 5.0f32, 6.0f32, 7.0f32, 8.0f32, 9.0f32, 10.0f32, + ]) + .expect("Should have space"); + + function_context.content.push(Some(DataSet { + ident: "A".to_string(), + buffers: vec![DataItem { + ident: "".to_string(), + data: Position { + offset: a_offset as usize, + size: a_size, + }, + key: 0, + }], + })); + function_context.content.push(Some(DataSet { + ident: "W".to_string(), + buffers: vec![DataItem { + ident: "".to_string(), + data: Position { + offset: weights_offset as usize, + size: weights_size, + }, + key: 0, + }], + })); + let archive = Box::leak(Box::new(Archive::init(ArchiveInit { + #[cfg(feature = "timestamp")] + timestamp_count: 1000, + }))); + + let mut recorder = archive.get_recorder().unwrap(); + recorder + .record(RecordPoint::TransferEnd) + .expect("Should have properly initialized recorder state"); + let promise = queue.enqueu(WorkToDo::FunctionArguments { + config, + context: function_context, + output_sets: Arc::new(vec![String::from("B")]), + recorder: recorder.get_sub_recorder().unwrap(), + }); + queue.enqueu(WorkToDo::Shutdown()); + let result_context = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap() + .block_on(promise) + .expect("Engine should run ok with basic function") + .get_context(); + recorder + .record(RecordPoint::FutureReturn) + .expect("Should have properly advanced recorder state"); + let output_item = result_context.content[0] + .as_ref() + .expect("Set should be present"); + let position = output_item.buffers[0].data; + assert_eq!(b_size, position.size, "Checking for size of output"); + let mut read_buffer = vec![0f32; position.size / 8]; + result_context + .context + .read(position.offset, &mut read_buffer) + .expect("Should succeed in reading"); + let expected = vec![ + 13f32, 16f32, 19f32, 22f32, 25f32, 27f32, 34f32, 41f32, 48f32, 55f32, 41f32, 52f32, 63f32, + 74f32, 85f32, 55f32, 70f32, 85f32, 100f32, 115f32, + ]; + for (should, is) in expected.iter().zip(read_buffer.iter()) { + assert_eq!(should, is, "Checking final result"); + } + drop(lock); +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/hip_tests/compiled_tests.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/hip_tests/compiled_tests.rs new file mode 100644 index 00000000..c6b61b63 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/hip_tests/compiled_tests.rs @@ -0,0 +1,214 @@ +use crate::{ + function_driver::compute_driver::gpu::gpu_tests::{ + cuda_tests::load_models::*, + get_driver, + tests_utils::{compare_result, execute_test, get_result, setup_test}, + GPU_LOCK, + }, + memory_domain::Context, +}; +use std::sync::Arc; + +#[test] +fn test_all() { + full_double_matmul(); + alexnet(); + lenet5(); + resnet18(); +} + +#[test] +fn full_double_matmul() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_full_double_matmul.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = + load_double_matmul(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, true); + compare_result(expected, read_buffer, true); + drop(lock); +} + +#[test] +fn alexnet() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_alexnet.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_alexnet(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, true); + compare_result(expected, read_buffer, true); + drop(lock); +} + +#[test] +fn lenet5() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_lenet5.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_lenet5(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, true); + compare_result(expected, read_buffer, true); + drop(lock); +} + +#[test] +fn resnet18base() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_resnet18.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_resnet18(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, false); + compare_result(expected, read_buffer, false); + drop(lock); +} + +#[test] +fn resnet18() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_resnet18.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_resnet18(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, true); + compare_result(expected, read_buffer, true); + drop(lock); +} + +#[test] +fn resnet18batch4() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_resnet18batch4.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_resnet18batch4(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, true); + compare_result(expected, read_buffer, true); + drop(lock); +} + +#[test] +fn resnet18batch16() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_resnet18batch16.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_resnet18batch16(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, true); + compare_result(expected, read_buffer, true); + drop(lock); +} + +#[test] +fn resnet18batch64() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_resnet18batch64.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_resnet18batch64(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, true); + compare_result(expected, read_buffer, true); + drop(lock); +} + +#[test] +fn resnet18onnx() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_resnet18onnx.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_resnet18onnx(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, false); + compare_result(expected, read_buffer, false); + drop(lock); +} + +#[test] +fn resnet34() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_resnet34.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_resnet34(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, true); + compare_result(expected, read_buffer, true); + drop(lock); +} + +#[test] +fn resnet50() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_resnet50.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_resnet50(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, false); + compare_result(expected, read_buffer, false); + drop(lock); +} + +#[test] +fn resnet152() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_resnet152.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_resnet152(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, true); + compare_result(expected, read_buffer, false); + drop(lock); +} + +#[test] +fn batch_norm() { + let lock = GPU_LOCK.lock().unwrap(); + let filename = &format!( + "{}/tests/data/hip/test_gpu_batch_norm.json", + env!("CARGO_MANIFEST_DIR") + ); + let (function_context, config, queue) = setup_test(&filename); + let (output_size, output_name, expected, function_context) = load_batch_norm(function_context); + let result_context = execute_test(function_context, config, queue, &output_name); + let read_buffer = get_result(result_context, output_size, false); + compare_result(expected, read_buffer, false); + drop(lock); +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/hip_tests/load_models.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/hip_tests/load_models.rs new file mode 100644 index 00000000..87f65ae5 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/hip_tests/load_models.rs @@ -0,0 +1,1961 @@ +use crate::{ + function_driver::compute_driver::gpu::gpu_tests::load_utils::{ + add_buffer, add_empty_buffer, add_number, read_tensor_from_file, + }, + memory_domain::Context, +}; + +pub fn load_double_matmul(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/double_matmul/constants"; + + add_buffer("arg2_1", 32, constants_path, &mut function_context); + add_buffer("linear1_weight", 40, constants_path, &mut function_context); + add_buffer("linear2_weight", 60, constants_path, &mut function_context); + + let output_name: &str = "buf1"; + let output_size: usize = 48; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_resnet18(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/resnet18/constants"; + + add_buffer("arg122_1", 602112, constants_path, &mut function_context); + add_buffer("conv1_weight", 37632, constants_path, &mut function_context); + add_buffer("bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("bn1_weight", 256, constants_path, &mut function_context); + add_buffer("bn1_bias", 256, constants_path, &mut function_context); + add_number("var_9", 200704, &mut function_context); + add_buffer("layer1_0_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_0_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer2_0_conv1_weight", 294912, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_0_weight", 32768, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv1_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer3_0_conv1_weight", 1179648, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_0_weight", 131072, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_conv1_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_107", 50176, &mut function_context); + add_buffer("layer3_1_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_117", 50176, &mut function_context); + add_buffer("layer4_0_conv1_weight", 4718592, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_0_weight", 524288, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_bias", 2048, constants_path, &mut function_context); + add_number("var_141", 25088, &mut function_context); + add_buffer("layer4_1_conv1_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_bias", 2048, constants_path, &mut function_context); + add_number("var_159", 512, &mut function_context); + add_number("var_160", 49, &mut function_context); + add_buffer("fc_bias", 4000, constants_path, &mut function_context); + add_buffer("fc_weight", 2048000, constants_path, &mut function_context); + + let output_name: &str = "buf42"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_resnet18batch4(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/resnet18batch4/constants"; + + add_buffer("arg122_1", 2408448, constants_path, &mut function_context); + add_buffer("conv1_weight", 37632, constants_path, &mut function_context); + add_buffer("bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("bn1_weight", 256, constants_path, &mut function_context); + add_buffer("bn1_bias", 256, constants_path, &mut function_context); + add_number("var_8", 3211264, &mut function_context); + add_number("var_11", 802816, &mut function_context); + add_buffer("layer1_0_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_0_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer2_0_conv1_weight", 294912, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_0_weight", 32768, constants_path, &mut function_context); + add_buffer("layer2_0_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv1_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer3_0_conv1_weight", 1179648, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_0_weight", 131072, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_conv1_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_109", 200704, &mut function_context); + add_buffer("layer3_1_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_119", 200704, &mut function_context); + add_buffer("layer4_0_conv1_weight", 4718592, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_0_weight", 524288, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_conv1_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_bias", 2048, constants_path, &mut function_context); + add_number("var_150", 100352, &mut function_context); + add_buffer("layer4_1_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_bias", 2048, constants_path, &mut function_context); + add_number("var_161", 2048, &mut function_context); + add_number("var_162", 49, &mut function_context); + add_buffer("fc_bias", 4000, constants_path, &mut function_context); + add_buffer("fc_weight", 2048000, constants_path, &mut function_context); + + let output_name: &str = "buf42"; + let output_size: usize = 16000; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_resnet18batch16(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/resnet18batch16/constants"; + + add_buffer("arg122_1", 2408448, constants_path, &mut function_context); + add_buffer("conv1_weight", 37632, constants_path, &mut function_context); + add_buffer("bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("bn1_weight", 256, constants_path, &mut function_context); + add_buffer("bn1_bias", 256, constants_path, &mut function_context); + add_number("var_8", 3211264, &mut function_context); + add_number("var_11", 802816, &mut function_context); + add_buffer("layer1_0_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_0_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer2_0_conv1_weight", 294912, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_0_weight", 32768, constants_path, &mut function_context); + add_buffer("layer2_0_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv1_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer3_0_conv1_weight", 1179648, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_0_weight", 131072, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_conv1_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_109", 200704, &mut function_context); + add_buffer("layer3_1_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_119", 200704, &mut function_context); + add_buffer("layer4_0_conv1_weight", 4718592, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_0_weight", 524288, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_conv1_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_bias", 2048, constants_path, &mut function_context); + add_number("var_150", 100352, &mut function_context); + add_buffer("layer4_1_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_bias", 2048, constants_path, &mut function_context); + add_number("var_161", 2048, &mut function_context); + add_number("var_162", 49, &mut function_context); + add_buffer("fc_bias", 4000, constants_path, &mut function_context); + add_buffer("fc_weight", 2048000, constants_path, &mut function_context); + + let output_name: &str = "buf42"; + let output_size: usize = 16000; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_resnet18batch64(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/resnet18batch64/constants"; + + add_buffer("arg122_1", 2408448, constants_path, &mut function_context); + add_buffer("conv1_weight", 37632, constants_path, &mut function_context); + add_buffer("bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("bn1_weight", 256, constants_path, &mut function_context); + add_buffer("bn1_bias", 256, constants_path, &mut function_context); + add_number("var_8", 3211264, &mut function_context); + add_number("var_11", 802816, &mut function_context); + add_buffer("layer1_0_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_0_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer2_0_conv1_weight", 294912, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_0_weight", 32768, constants_path, &mut function_context); + add_buffer("layer2_0_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv1_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer3_0_conv1_weight", 1179648, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_0_weight", 131072, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_conv1_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_109", 200704, &mut function_context); + add_buffer("layer3_1_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_119", 200704, &mut function_context); + add_buffer("layer4_0_conv1_weight", 4718592, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_0_weight", 524288, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_conv1_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_bias", 2048, constants_path, &mut function_context); + add_number("var_150", 100352, &mut function_context); + add_buffer("layer4_1_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_bias", 2048, constants_path, &mut function_context); + add_number("var_161", 2048, &mut function_context); + add_number("var_162", 49, &mut function_context); + add_buffer("fc_bias", 4000, constants_path, &mut function_context); + add_buffer("fc_weight", 2048000, constants_path, &mut function_context); + + let output_name: &str = "buf42"; + let output_size: usize = 16000; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_resnet18onnx(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/resnet18onnx/constants"; + + add_buffer("arg42_1", 602112, constants_path, &mut function_context); + add_buffer("conv1_Conv_weight", 37632, constants_path, &mut function_context); + add_buffer("conv1_Conv_bias", 256, constants_path, &mut function_context); + add_number("var_6", 200704, &mut function_context); + add_buffer("layer1_layer1_0_conv1_Conv_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_layer1_0_conv1_Conv_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_layer1_0_conv2_Conv_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_layer1_0_conv2_Conv_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_layer1_1_conv1_Conv_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_layer1_1_conv1_Conv_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_layer1_1_conv2_Conv_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_layer1_1_conv2_Conv_bias", 256, constants_path, &mut function_context); + add_buffer("layer2_layer2_0_conv1_Conv_weight", 294912, constants_path, &mut function_context); + add_buffer("layer2_layer2_0_conv1_Conv_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_layer2_0_conv2_Conv_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_layer2_0_downsample_downsample_0_Conv_weight", 32768, constants_path, &mut function_context); + add_buffer("layer2_layer2_0_conv2_Conv_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_layer2_0_downsample_downsample_0_Conv_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_layer2_1_conv1_Conv_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_layer2_1_conv1_Conv_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_layer2_1_conv2_Conv_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_layer2_1_conv2_Conv_bias", 512, constants_path, &mut function_context); + add_buffer("layer3_layer3_0_conv1_Conv_weight", 1179648, constants_path, &mut function_context); + add_buffer("layer3_layer3_0_conv1_Conv_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_layer3_0_conv2_Conv_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_layer3_0_downsample_downsample_0_Conv_weight", 131072, constants_path, &mut function_context); + add_buffer("layer3_layer3_0_conv2_Conv_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_layer3_0_downsample_downsample_0_Conv_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_layer3_1_conv1_Conv_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_layer3_1_conv1_Conv_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_layer3_1_conv2_Conv_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_layer3_1_conv2_Conv_bias", 1024, constants_path, &mut function_context); + add_number("var_70", 50176, &mut function_context); + add_buffer("layer4_layer4_0_conv1_Conv_weight", 4718592, constants_path, &mut function_context); + add_buffer("layer4_layer4_0_conv1_Conv_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_layer4_0_conv2_Conv_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_layer4_0_downsample_downsample_0_Conv_weight", 524288, constants_path, &mut function_context); + add_buffer("layer4_layer4_0_conv2_Conv_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_layer4_0_downsample_downsample_0_Conv_bias", 2048, constants_path, &mut function_context); + add_number("var_85", 25088, &mut function_context); + add_buffer("layer4_layer4_1_conv1_Conv_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_layer4_1_conv1_Conv_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_layer4_1_conv2_Conv_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_layer4_1_conv2_Conv_bias", 2048, constants_path, &mut function_context); + add_number("var_97", 512, &mut function_context); + add_number("var_98", 49, &mut function_context); + add_buffer("fc_Gemm_bias", 4000, constants_path, &mut function_context); + add_buffer("fc_Gemm_weight", 2048000, constants_path, &mut function_context); + + let output_name: &str = "buf39"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_resnet34(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/resnet34/constants"; + + add_buffer("arg218_1", 602112, constants_path, &mut function_context); + add_buffer("conv1_weight", 37632, constants_path, &mut function_context); + add_buffer("bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("bn1_weight", 256, constants_path, &mut function_context); + add_buffer("bn1_bias", 256, constants_path, &mut function_context); + add_number("var_9", 200704, &mut function_context); + add_buffer("layer1_0_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_0_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_2_conv1_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_2_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer2_0_conv1_weight", 294912, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_0_weight", 32768, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv1_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_2_conv1_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_2_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_3_conv1_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_3_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer3_0_conv1_weight", 1179648, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_0_weight", 131072, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_conv1_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_152", 50176, &mut function_context); + add_buffer("layer3_1_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_162", 50176, &mut function_context); + add_buffer("layer3_2_conv1_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_171", 50176, &mut function_context); + add_buffer("layer3_2_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_181", 50176, &mut function_context); + add_buffer("layer3_3_conv1_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_190", 50176, &mut function_context); + add_buffer("layer3_3_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_200", 50176, &mut function_context); + add_buffer("layer3_4_conv1_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_209", 50176, &mut function_context); + add_buffer("layer3_4_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_219", 50176, &mut function_context); + add_buffer("layer3_5_conv1_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_228", 50176, &mut function_context); + add_buffer("layer3_5_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_238", 50176, &mut function_context); + add_buffer("layer4_0_conv1_weight", 4718592, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_0_weight", 524288, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_bias", 2048, constants_path, &mut function_context); + add_number("var_262", 25088, &mut function_context); + add_buffer("layer4_1_conv1_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_bias", 2048, constants_path, &mut function_context); + add_number("var_279", 25088, &mut function_context); + add_buffer("layer4_2_conv1_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_2_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_bias", 2048, constants_path, &mut function_context); + add_number("var_297", 512, &mut function_context); + add_number("var_298", 49, &mut function_context); + add_buffer("fc_bias", 4000, constants_path, &mut function_context); + add_buffer("fc_weight", 2048000, constants_path, &mut function_context); + + let output_name: &str = "buf74"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_resnet50(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/resnet50/constants"; + + add_buffer("arg320_1", 602112, constants_path, &mut function_context); + add_buffer("conv1_weight", 37632, constants_path, &mut function_context); + add_buffer("bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("bn1_weight", 256, constants_path, &mut function_context); + add_buffer("bn1_bias", 256, constants_path, &mut function_context); + add_number("var_9", 200704, &mut function_context); + add_buffer("layer1_0_conv1_weight", 16384, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_0_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_0_conv3_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_0_downsample_0_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_0_bn3_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_bn3_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_bn3_weight", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_bn3_bias", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_downsample_1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_downsample_1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_downsample_1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_downsample_1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer1_1_conv1_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv3_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_1_bn3_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer1_1_bn3_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer1_1_bn3_weight", 1024, constants_path, &mut function_context); + add_buffer("layer1_1_bn3_bias", 1024, constants_path, &mut function_context); + add_buffer("layer1_2_conv1_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_2_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_2_conv3_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_2_bn3_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer1_2_bn3_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer1_2_bn3_weight", 1024, constants_path, &mut function_context); + add_buffer("layer1_2_bn3_bias", 1024, constants_path, &mut function_context); + add_buffer("layer2_0_conv1_weight", 131072, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_0_weight", 524288, constants_path, &mut function_context); + add_buffer("layer2_0_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_0_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_1_conv1_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_1_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_1_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_1_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_1_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_2_conv1_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_2_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_2_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_2_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_2_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_2_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_2_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_3_conv1_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_3_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_3_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_3_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_3_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_3_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_3_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer3_0_conv1_weight", 524288, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_193", 50176, &mut function_context); + add_buffer("layer3_0_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_0_weight", 2097152, constants_path, &mut function_context); + add_buffer("layer3_0_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_214", 50176, &mut function_context); + add_buffer("layer3_1_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_223", 50176, &mut function_context); + add_buffer("layer3_1_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_1_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_1_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_1_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_1_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_237", 50176, &mut function_context); + add_buffer("layer3_2_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_246", 50176, &mut function_context); + add_buffer("layer3_2_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_2_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_2_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_2_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_2_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_260", 50176, &mut function_context); + add_buffer("layer3_3_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_269", 50176, &mut function_context); + add_buffer("layer3_3_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_3_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_3_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_3_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_3_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_283", 50176, &mut function_context); + add_buffer("layer3_4_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_292", 50176, &mut function_context); + add_buffer("layer3_4_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_4_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_4_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_4_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_4_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_306", 50176, &mut function_context); + add_buffer("layer3_5_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_315", 50176, &mut function_context); + add_buffer("layer3_5_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_5_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_5_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_5_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_5_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_bias", 2048, constants_path, &mut function_context); + add_number("var_329", 100352, &mut function_context); + add_buffer("layer4_0_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_0_weight", 8388608, constants_path, &mut function_context); + add_buffer("layer4_0_bn3_running_mean", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_bn3_running_var", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_bn3_weight", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_bn3_bias", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_mean", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_var", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_weight", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_bias", 8192, constants_path, &mut function_context); + add_number("var_350", 100352, &mut function_context); + add_buffer("layer4_1_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_bias", 2048, constants_path, &mut function_context); + add_number("var_356", 25088, &mut function_context); + add_buffer("layer4_1_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn3_running_mean", 8192, constants_path, &mut function_context); + add_buffer("layer4_1_bn3_running_var", 8192, constants_path, &mut function_context); + add_buffer("layer4_1_bn3_weight", 8192, constants_path, &mut function_context); + add_buffer("layer4_1_bn3_bias", 8192, constants_path, &mut function_context); + add_number("var_370", 100352, &mut function_context); + add_buffer("layer4_2_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn1_bias", 2048, constants_path, &mut function_context); + add_number("var_376", 25088, &mut function_context); + add_buffer("layer4_2_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn3_running_mean", 8192, constants_path, &mut function_context); + add_buffer("layer4_2_bn3_running_var", 8192, constants_path, &mut function_context); + add_buffer("layer4_2_bn3_weight", 8192, constants_path, &mut function_context); + add_buffer("layer4_2_bn3_bias", 8192, constants_path, &mut function_context); + add_number("var_391", 2048, &mut function_context); + add_number("var_392", 49, &mut function_context); + add_buffer("fc_bias", 4000, constants_path, &mut function_context); + add_buffer("fc_weight", 8192000, constants_path, &mut function_context); + + let output_name: &str = "buf108"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_resnet152(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/resnet152/constants"; + + add_buffer("arg932_1", 602112, constants_path, &mut function_context); + add_buffer("conv1_weight", 37632, constants_path, &mut function_context); + add_buffer("bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("bn1_weight", 256, constants_path, &mut function_context); + add_buffer("bn1_bias", 256, constants_path, &mut function_context); + add_number("var_9", 200704, &mut function_context); + add_buffer("layer1_0_conv1_weight", 16384, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_0_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_0_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_0_conv3_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_0_downsample_0_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_0_bn3_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_bn3_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_bn3_weight", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_bn3_bias", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_downsample_1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_downsample_1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_downsample_1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer1_0_downsample_1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer1_1_conv1_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_1_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_1_conv3_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_1_bn3_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer1_1_bn3_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer1_1_bn3_weight", 1024, constants_path, &mut function_context); + add_buffer("layer1_1_bn3_bias", 1024, constants_path, &mut function_context); + add_buffer("layer1_2_conv1_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn1_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_2_conv2_weight", 147456, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_running_mean", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_running_var", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_weight", 256, constants_path, &mut function_context); + add_buffer("layer1_2_bn2_bias", 256, constants_path, &mut function_context); + add_buffer("layer1_2_conv3_weight", 65536, constants_path, &mut function_context); + add_buffer("layer1_2_bn3_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer1_2_bn3_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer1_2_bn3_weight", 1024, constants_path, &mut function_context); + add_buffer("layer1_2_bn3_bias", 1024, constants_path, &mut function_context); + add_buffer("layer2_0_conv1_weight", 131072, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_0_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_0_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_0_weight", 524288, constants_path, &mut function_context); + add_buffer("layer2_0_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_0_downsample_1_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_1_conv1_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_1_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_1_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_1_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_1_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_1_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_1_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_2_conv1_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_2_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_2_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_2_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_2_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_2_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_2_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_2_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_3_conv1_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_3_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_3_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_3_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_3_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_3_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_3_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_3_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_4_conv1_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_4_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_4_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_4_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_4_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_4_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_4_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_4_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_4_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_4_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_4_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_4_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_4_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_4_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_4_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_5_conv1_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_5_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_5_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_5_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_5_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_5_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_5_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_5_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_5_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_5_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_5_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_5_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_5_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_5_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_5_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_6_conv1_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_6_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_6_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_6_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_6_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_6_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_6_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_6_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_6_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_6_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_6_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_6_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_6_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_6_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_6_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer2_7_conv1_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_7_bn1_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_7_bn1_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_7_bn1_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_7_bn1_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_7_conv2_weight", 589824, constants_path, &mut function_context); + add_buffer("layer2_7_bn2_running_mean", 512, constants_path, &mut function_context); + add_buffer("layer2_7_bn2_running_var", 512, constants_path, &mut function_context); + add_buffer("layer2_7_bn2_weight", 512, constants_path, &mut function_context); + add_buffer("layer2_7_bn2_bias", 512, constants_path, &mut function_context); + add_buffer("layer2_7_conv3_weight", 262144, constants_path, &mut function_context); + add_buffer("layer2_7_bn3_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer2_7_bn3_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer2_7_bn3_weight", 2048, constants_path, &mut function_context); + add_buffer("layer2_7_bn3_bias", 2048, constants_path, &mut function_context); + add_buffer("layer3_0_conv1_weight", 524288, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn1_bias", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_0_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_281", 50176, &mut function_context); + add_buffer("layer3_0_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_0_weight", 2097152, constants_path, &mut function_context); + add_buffer("layer3_0_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_0_downsample_1_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_302", 50176, &mut function_context); + add_buffer("layer3_1_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_1_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_311", 50176, &mut function_context); + add_buffer("layer3_1_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_1_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_1_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_1_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_1_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_325", 50176, &mut function_context); + add_buffer("layer3_2_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_2_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_334", 50176, &mut function_context); + add_buffer("layer3_2_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_2_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_2_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_2_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_2_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_348", 50176, &mut function_context); + add_buffer("layer3_3_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_3_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_357", 50176, &mut function_context); + add_buffer("layer3_3_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_3_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_3_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_3_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_3_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_371", 50176, &mut function_context); + add_buffer("layer3_4_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_4_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_380", 50176, &mut function_context); + add_buffer("layer3_4_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_4_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_4_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_4_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_4_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_394", 50176, &mut function_context); + add_buffer("layer3_5_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_5_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_403", 50176, &mut function_context); + add_buffer("layer3_5_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_5_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_5_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_5_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_5_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_6_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_6_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_6_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_6_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_417", 50176, &mut function_context); + add_buffer("layer3_6_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_6_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_6_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_6_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_6_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_426", 50176, &mut function_context); + add_buffer("layer3_6_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_6_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_6_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_6_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_6_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_7_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_7_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_7_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_7_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_440", 50176, &mut function_context); + add_buffer("layer3_7_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_7_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_7_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_7_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_7_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_449", 50176, &mut function_context); + add_buffer("layer3_7_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_7_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_7_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_7_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_7_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_8_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_8_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_8_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_8_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_463", 50176, &mut function_context); + add_buffer("layer3_8_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_8_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_8_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_8_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_8_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_472", 50176, &mut function_context); + add_buffer("layer3_8_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_8_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_8_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_8_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_8_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_9_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_9_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_9_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_9_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_486", 50176, &mut function_context); + add_buffer("layer3_9_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_9_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_9_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_9_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_9_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_495", 50176, &mut function_context); + add_buffer("layer3_9_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_9_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_9_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_9_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_9_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_10_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_10_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_10_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_10_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_509", 50176, &mut function_context); + add_buffer("layer3_10_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_10_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_10_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_10_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_10_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_518", 50176, &mut function_context); + add_buffer("layer3_10_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_10_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_10_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_10_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_10_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_11_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_11_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_11_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_11_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_532", 50176, &mut function_context); + add_buffer("layer3_11_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_11_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_11_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_11_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_11_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_541", 50176, &mut function_context); + add_buffer("layer3_11_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_11_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_11_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_11_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_11_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_12_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_12_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_12_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_12_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_555", 50176, &mut function_context); + add_buffer("layer3_12_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_12_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_12_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_12_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_12_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_564", 50176, &mut function_context); + add_buffer("layer3_12_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_12_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_12_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_12_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_12_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_13_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_13_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_13_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_13_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_578", 50176, &mut function_context); + add_buffer("layer3_13_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_13_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_13_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_13_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_13_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_587", 50176, &mut function_context); + add_buffer("layer3_13_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_13_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_13_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_13_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_13_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_14_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_14_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_14_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_14_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_601", 50176, &mut function_context); + add_buffer("layer3_14_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_14_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_14_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_14_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_14_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_610", 50176, &mut function_context); + add_buffer("layer3_14_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_14_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_14_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_14_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_14_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_15_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_15_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_15_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_15_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_624", 50176, &mut function_context); + add_buffer("layer3_15_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_15_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_15_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_15_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_15_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_633", 50176, &mut function_context); + add_buffer("layer3_15_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_15_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_15_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_15_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_15_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_16_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_16_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_16_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_16_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_647", 50176, &mut function_context); + add_buffer("layer3_16_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_16_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_16_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_16_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_16_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_656", 50176, &mut function_context); + add_buffer("layer3_16_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_16_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_16_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_16_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_16_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_17_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_17_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_17_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_17_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_670", 50176, &mut function_context); + add_buffer("layer3_17_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_17_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_17_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_17_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_17_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_679", 50176, &mut function_context); + add_buffer("layer3_17_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_17_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_17_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_17_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_17_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_18_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_18_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_18_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_18_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_693", 50176, &mut function_context); + add_buffer("layer3_18_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_18_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_18_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_18_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_18_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_702", 50176, &mut function_context); + add_buffer("layer3_18_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_18_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_18_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_18_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_18_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_19_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_19_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_19_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_19_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_716", 50176, &mut function_context); + add_buffer("layer3_19_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_19_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_19_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_19_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_19_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_725", 50176, &mut function_context); + add_buffer("layer3_19_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_19_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_19_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_19_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_19_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_20_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_20_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_20_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_20_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_739", 50176, &mut function_context); + add_buffer("layer3_20_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_20_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_20_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_20_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_20_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_748", 50176, &mut function_context); + add_buffer("layer3_20_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_20_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_20_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_20_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_20_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_21_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_21_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_21_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_21_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_762", 50176, &mut function_context); + add_buffer("layer3_21_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_21_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_21_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_21_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_21_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_771", 50176, &mut function_context); + add_buffer("layer3_21_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_21_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_21_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_21_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_21_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_22_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_22_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_22_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_22_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_785", 50176, &mut function_context); + add_buffer("layer3_22_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_22_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_22_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_22_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_22_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_794", 50176, &mut function_context); + add_buffer("layer3_22_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_22_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_22_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_22_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_22_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_23_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_23_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_23_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_23_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_808", 50176, &mut function_context); + add_buffer("layer3_23_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_23_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_23_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_23_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_23_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_817", 50176, &mut function_context); + add_buffer("layer3_23_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_23_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_23_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_23_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_23_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_24_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_24_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_24_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_24_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_831", 50176, &mut function_context); + add_buffer("layer3_24_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_24_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_24_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_24_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_24_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_840", 50176, &mut function_context); + add_buffer("layer3_24_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_24_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_24_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_24_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_24_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_25_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_25_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_25_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_25_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_854", 50176, &mut function_context); + add_buffer("layer3_25_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_25_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_25_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_25_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_25_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_863", 50176, &mut function_context); + add_buffer("layer3_25_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_25_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_25_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_25_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_25_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_26_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_26_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_26_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_26_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_877", 50176, &mut function_context); + add_buffer("layer3_26_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_26_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_26_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_26_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_26_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_886", 50176, &mut function_context); + add_buffer("layer3_26_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_26_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_26_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_26_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_26_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_27_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_27_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_27_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_27_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_900", 50176, &mut function_context); + add_buffer("layer3_27_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_27_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_27_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_27_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_27_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_909", 50176, &mut function_context); + add_buffer("layer3_27_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_27_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_27_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_27_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_27_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_28_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_28_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_28_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_28_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_923", 50176, &mut function_context); + add_buffer("layer3_28_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_28_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_28_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_28_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_28_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_932", 50176, &mut function_context); + add_buffer("layer3_28_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_28_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_28_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_28_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_28_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_29_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_29_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_29_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_29_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_946", 50176, &mut function_context); + add_buffer("layer3_29_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_29_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_29_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_29_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_29_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_955", 50176, &mut function_context); + add_buffer("layer3_29_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_29_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_29_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_29_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_29_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_30_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_30_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_30_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_30_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_969", 50176, &mut function_context); + add_buffer("layer3_30_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_30_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_30_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_30_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_30_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_978", 50176, &mut function_context); + add_buffer("layer3_30_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_30_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_30_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_30_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_30_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_31_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_31_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_31_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_31_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_992", 50176, &mut function_context); + add_buffer("layer3_31_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_31_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_31_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_31_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_31_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_1001", 50176, &mut function_context); + add_buffer("layer3_31_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_31_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_31_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_31_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_31_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_32_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_32_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_32_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_32_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_1015", 50176, &mut function_context); + add_buffer("layer3_32_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_32_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_32_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_32_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_32_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_1024", 50176, &mut function_context); + add_buffer("layer3_32_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_32_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_32_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_32_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_32_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_33_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_33_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_33_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_33_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_1038", 50176, &mut function_context); + add_buffer("layer3_33_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_33_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_33_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_33_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_33_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_1047", 50176, &mut function_context); + add_buffer("layer3_33_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_33_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_33_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_33_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_33_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_34_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_34_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_34_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_34_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_1061", 50176, &mut function_context); + add_buffer("layer3_34_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_34_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_34_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_34_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_34_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_1070", 50176, &mut function_context); + add_buffer("layer3_34_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_34_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_34_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_34_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_34_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer3_35_bn1_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_35_bn1_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_35_bn1_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_35_bn1_bias", 1024, constants_path, &mut function_context); + add_number("var_1084", 50176, &mut function_context); + add_buffer("layer3_35_conv2_weight", 2359296, constants_path, &mut function_context); + add_buffer("layer3_35_bn2_running_mean", 1024, constants_path, &mut function_context); + add_buffer("layer3_35_bn2_running_var", 1024, constants_path, &mut function_context); + add_buffer("layer3_35_bn2_weight", 1024, constants_path, &mut function_context); + add_buffer("layer3_35_bn2_bias", 1024, constants_path, &mut function_context); + add_number("var_1093", 50176, &mut function_context); + add_buffer("layer3_35_conv3_weight", 1048576, constants_path, &mut function_context); + add_buffer("layer3_35_bn3_running_mean", 4096, constants_path, &mut function_context); + add_buffer("layer3_35_bn3_running_var", 4096, constants_path, &mut function_context); + add_buffer("layer3_35_bn3_weight", 4096, constants_path, &mut function_context); + add_buffer("layer3_35_bn3_bias", 4096, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn1_bias", 2048, constants_path, &mut function_context); + add_number("var_1107", 100352, &mut function_context); + add_buffer("layer4_0_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_0_weight", 8388608, constants_path, &mut function_context); + add_buffer("layer4_0_bn3_running_mean", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_bn3_running_var", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_bn3_weight", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_bn3_bias", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_mean", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_running_var", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_weight", 8192, constants_path, &mut function_context); + add_buffer("layer4_0_downsample_1_bias", 8192, constants_path, &mut function_context); + add_number("var_1128", 100352, &mut function_context); + add_buffer("layer4_1_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn1_bias", 2048, constants_path, &mut function_context); + add_number("var_1134", 25088, &mut function_context); + add_buffer("layer4_1_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_1_bn3_running_mean", 8192, constants_path, &mut function_context); + add_buffer("layer4_1_bn3_running_var", 8192, constants_path, &mut function_context); + add_buffer("layer4_1_bn3_weight", 8192, constants_path, &mut function_context); + add_buffer("layer4_1_bn3_bias", 8192, constants_path, &mut function_context); + add_number("var_1148", 100352, &mut function_context); + add_buffer("layer4_2_bn1_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn1_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn1_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn1_bias", 2048, constants_path, &mut function_context); + add_number("var_1154", 25088, &mut function_context); + add_buffer("layer4_2_conv2_weight", 9437184, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_running_mean", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_running_var", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_weight", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn2_bias", 2048, constants_path, &mut function_context); + add_buffer("layer4_2_bn3_running_mean", 8192, constants_path, &mut function_context); + add_buffer("layer4_2_bn3_running_var", 8192, constants_path, &mut function_context); + add_buffer("layer4_2_bn3_weight", 8192, constants_path, &mut function_context); + add_buffer("layer4_2_bn3_bias", 8192, constants_path, &mut function_context); + add_number("var_1169", 2048, &mut function_context); + add_number("var_1170", 49, &mut function_context); + add_buffer("fc_bias", 4000, constants_path, &mut function_context); + add_buffer("fc_weight", 8192000, constants_path, &mut function_context); + + let output_name: &str = "buf312"; + let output_size: usize = 4000; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_alexnet(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/alexnet/constants"; + + add_buffer("arg16_1", 618348, constants_path, &mut function_context); + add_buffer("features_0_weight", 92928, constants_path, &mut function_context); + add_buffer("features_0_bias", 256, constants_path, &mut function_context); + add_number("var_6", 46656, &mut function_context); + add_buffer("features_3_weight", 1228800, constants_path, &mut function_context); + add_buffer("features_3_bias", 768, constants_path, &mut function_context); + add_number("var_13", 32448, &mut function_context); + add_buffer("features_6_weight", 2654208, constants_path, &mut function_context); + add_buffer("features_6_bias", 1536, constants_path, &mut function_context); + add_buffer("features_8_weight", 3538944, constants_path, &mut function_context); + add_buffer("features_8_bias", 1024, constants_path, &mut function_context); + add_buffer("features_10_weight", 2359296, constants_path, &mut function_context); + add_buffer("features_10_bias", 1024, constants_path, &mut function_context); + add_number("var_28", 9216, &mut function_context); + add_buffer("classifier_1_weight", 150994944, constants_path, &mut function_context); + add_buffer("classifier_1_bias", 16384, constants_path, &mut function_context); + add_buffer("classifier_4_weight", 67108864, constants_path, &mut function_context); + add_buffer("classifier_4_bias", 16384, constants_path, &mut function_context); + add_buffer("classifier_6_bias", 40, constants_path, &mut function_context); + add_buffer("classifier_6_weight", 163840, constants_path, &mut function_context); + + let output_name: &str = "buf18"; + let output_size: usize = 40; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_lenet5(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/lenet5/constants"; + + add_number("var_1", 6, &mut function_context); + add_buffer("arg10_1", 3136, constants_path, &mut function_context); + add_buffer("conv1_weight", 600, constants_path, &mut function_context); + add_number("var_8", 1176, &mut function_context); + add_buffer("conv2_weight", 9600, constants_path, &mut function_context); + add_buffer("conv2_bias", 64, constants_path, &mut function_context); + add_number("var_15", 400, &mut function_context); + add_buffer("fc1_weight", 192000, constants_path, &mut function_context); + add_buffer("fc1_bias", 480, constants_path, &mut function_context); + add_buffer("fc2_weight", 40320, constants_path, &mut function_context); + add_buffer("fc2_bias", 336, constants_path, &mut function_context); + add_buffer("fc3_bias", 40, constants_path, &mut function_context); + add_buffer("fc3_weight", 3360, constants_path, &mut function_context); + + let output_name: &str = "buf11"; + let output_size: usize = 40; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} + +pub fn load_batch_norm(mut function_context: Context) -> (usize, String, Vec, Context) { + let constants_path = "/home/alrusso/pytorch-aot/processed/batch_norm/constants"; + + add_buffer("arg9_1", 6400, constants_path, &mut function_context); + add_buffer("conv_weight", 147456, constants_path, &mut function_context); + add_buffer("conv_bias", 256, constants_path, &mut function_context); + add_buffer("bn_running_mean", 256, constants_path, &mut function_context); + add_buffer("bn_running_var", 256, constants_path, &mut function_context); + add_buffer("bn_weight", 256, constants_path, &mut function_context); + add_buffer("bn_bias", 256, constants_path, &mut function_context); + add_number("var_10", 256, &mut function_context); + add_number("var_12", 3, &mut function_context); + add_buffer("fc_weight", 3072, constants_path, &mut function_context); + + let output_name: &str = "buf5"; + let output_size: usize = 12; + let expected: Vec = read_tensor_from_file(output_name, constants_path).unwrap(); + + (output_size, output_name.to_string(), expected, function_context) +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/load_utils.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/load_utils.rs new file mode 100644 index 00000000..44ec5e36 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/load_utils.rs @@ -0,0 +1,75 @@ +use std::{ + fs::File, + io::{self, Read}, + mem, +}; + +use crate::{memory_domain::Context, DataItem, DataSet, Position}; + +pub fn read_tensor_from_file(file_path: &str, file_folder: &str) -> io::Result> { + let full_path = format!("{file_folder}/{file_path}.bin"); + + let mut file = File::open(&full_path)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + + let floats: &[f32] = unsafe { + std::slice::from_raw_parts( + buffer.as_ptr() as *const f32, + buffer.len() / mem::size_of::(), + ) + }; + + Ok(floats.to_vec()) +} + +pub fn add_number(name: &str, value: i64, function_context: &mut Context) { + let offset = function_context + .get_free_space_and_write_slice(&vec![value as i64]) + .expect("Should have space"); + function_context.content.push(Some(DataSet { + ident: name.to_string(), + buffers: vec![DataItem { + ident: name.to_string(), + data: Position { + offset: offset as usize, + size: 8, + }, + key: 0, + }], + })); +} + +pub fn add_empty_buffer(name: &str, size: usize, function_context: &mut Context) { + let offset = function_context + .get_free_space_and_write_slice(&vec![0f32; size / 4]) + .expect("Should have space"); + function_context.content.push(Some(DataSet { + ident: name.to_string(), + buffers: vec![DataItem { + ident: name.to_string(), + data: Position { + offset: offset as usize, + size: size, + }, + key: 0, + }], + })); +} + +pub fn add_buffer(name: &str, size: usize, path: &str, function_context: &mut Context) { + let offset = function_context + .get_free_space_and_write_slice(&read_tensor_from_file(&name, &path).unwrap()) + .expect("Should have space"); + function_context.content.push(Some(DataSet { + ident: name.to_string(), + buffers: vec![DataItem { + ident: name.to_string(), + data: Position { + offset: offset as usize, + size: size, + }, + key: 0, + }], + })); +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/tests_utils.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/tests_utils.rs new file mode 100644 index 00000000..47d91518 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_tests/tests_utils.rs @@ -0,0 +1,140 @@ +use crate::{ + function_driver::{ + compute_driver::{ + compute_driver_tests::compute_driver_tests::prepare_engine_and_function, + gpu::gpu_tests::{get_driver, Archive, ArchiveInit, RecordPoint}, + }, + test_queue::TestQueue, + Arc, ComputeResource, Driver, FunctionConfig, WorkToDo, + }, + memory_domain::{gpu::GpuMemoryDomain, Context, ContextTrait, MemoryResource}, +}; + +pub fn setup_test(filename: &str) -> (Context, FunctionConfig, Box) { + let dom_init = MemoryResource::Shared { + id: 0, + size: (1 << 38), // TODO(GPU) : choose a good value for the context size + }; + let driver: Box = get_driver(); + let drv_init = vec![ComputeResource::GPU(7, 2, 2)]; + + prepare_engine_and_function::(filename, dom_init, &driver, drv_init) +} + +pub fn execute_test( + function_context: Context, + config: FunctionConfig, + queue: Box, + output_name: &str, +) -> Context { + let archive = Box::leak(Box::new(Archive::init(ArchiveInit { + #[cfg(feature = "timestamp")] + timestamp_count: 1000, + }))); + let mut recorder = archive.get_recorder().unwrap(); + recorder + .record(RecordPoint::TransferEnd) + .expect("Should have properly initialized recorder state"); + let promise = queue.enqueu(WorkToDo::FunctionArguments { + config, + context: function_context, + output_sets: Arc::new(vec![String::from(output_name)]), + recorder: recorder.get_sub_recorder().unwrap(), + }); + queue.enqueu(WorkToDo::Shutdown()); + let result_context = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap() + .block_on(promise) + .expect("Engine should run ok with basic function") + .get_context(); + recorder + .record(RecordPoint::FutureReturn) + .expect("Should have properly advanced recorder state"); + result_context +} + +pub fn get_result(result_context: Context, output_size: usize, asserts: bool) -> Vec { + let output_item = result_context.content[0] + .as_ref() + .expect("Set should be present"); + let position = output_item.buffers[0].data; + if asserts { + assert_eq!(output_size, position.size, "Checking for size of output"); + } else { + println!( + "Expected output size: {output_size}\t\tActual size: {0}", + position.size + ); + } + let mut read_buffer = vec![0f32; position.size / 4]; + result_context + .context + .read(position.offset, &mut read_buffer) + .expect("Should succeed in reading"); + read_buffer +} + +pub fn get_resulti32(result_context: Context, output_size: usize, asserts: bool) -> Vec { + let output_item = result_context.content[0] + .as_ref() + .expect("Set should be present"); + let position = output_item.buffers[0].data; + if asserts { + assert_eq!(output_size, position.size, "Checking for size of output"); + } else { + println!( + "Expected output size: {output_size}\t\tActual size: {0}", + position.size + ); + } + let mut read_buffer = vec![0i32; position.size / 4]; + result_context + .context + .read(position.offset, &mut read_buffer) + .expect("Should succeed in reading"); + read_buffer +} + +pub fn get_resulti64(result_context: Context, output_size: usize, asserts: bool) -> Vec { + let output_item = result_context.content[0] + .as_ref() + .expect("Set should be present"); + let position = output_item.buffers[0].data; + if asserts { + assert_eq!(output_size, position.size, "Checking for size of output"); + } else { + println!( + "Expected output size: {output_size}\t\tActual size: {0}", + position.size + ); + } + let mut read_buffer = vec![0i64; position.size / 8]; + result_context + .context + .read(position.offset, &mut read_buffer) + .expect("Should succeed in reading"); + read_buffer +} + +pub fn compare_result(expected: Vec, read_buffer: Vec, asserts: bool) { + const DELTA: f32 = 0.003; + for (should, is) in expected.iter().zip(read_buffer.iter()) { + let abs_diff = (should - is).abs(); + let mut ratio = should / is; + if ratio.is_nan() { + ratio = 1.0; + } + let diff_ratio = ratio - 1.0; + let abs_diff_ratio = diff_ratio.abs(); + if asserts { + assert!( + abs_diff_ratio <= DELTA, + "Checking final result: {should} - {is}" + ); + } else { + println!("{should:10.3}\t{is:10.3}\t{abs_diff:10.3}\t{ratio:10.3}"); + } + } + println!("Correct result!"); +} diff --git a/machine_interface/src/function_driver/compute_driver/gpu/gpu_utils.rs b/machine_interface/src/function_driver/compute_driver/gpu/gpu_utils.rs new file mode 100644 index 00000000..23c10774 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/gpu_utils.rs @@ -0,0 +1,545 @@ +use super::{ + buffer_pool::BufferPool, + config_parsing::Sizing, + gpu_api::{self, DevicePointer}, +}; +use crate::{ + function_driver::{ComputeResource, FunctionConfig, GpuConfig, WorkDone, WorkQueue, WorkToDo}, + interface::read_output_structs, + memory_domain::{self, Context, ContextState, ContextTrait, ContextType}, + DataItem, DataSet, Position, +}; +use dandelion_commons::{records::RecordPoint, DandelionError, DandelionResult}; +use libc::c_void; +use log::{debug, error}; +use serde::{Deserialize, Serialize}; +use std::{ + collections::HashMap, + io::{BufRead, BufReader, Write}, + process::{Child, ChildStdin, ChildStdout, Command, Stdio}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + thread::spawn, +}; + +#[cfg(feature = "gpu_process")] +use nix::{ + fcntl::OFlag, + sys::{ + mman::{mmap, shm_open, MapFlags, ProtFlags}, + stat::{fstat, Mode}, + }, +}; + +pub fn get_data_length(ident: &str, context: &Context) -> DandelionResult { + let dataset = context + .content + .iter() + .find(|&elem| match elem { + Some(set) => set.ident == ident, + _ => false, + }) + .ok_or(DandelionError::UndeclaredIdentifier(ident.to_owned()))? + .as_ref() + .unwrap(); // okay, as we matched successfully + + let length = dataset + .buffers + .iter() + .fold(0usize, |acc, item| acc + item.data.size); + + Ok(length) +} + +pub fn copy_data_to_device( + ident: &str, + context: &Context, + dev_ptr: &DevicePointer, +) -> DandelionResult<()> { + let dataset = context + .content + .iter() + .find(|&elem| match elem { + Some(set) => set.ident == ident, + _ => false, + }) + .ok_or(DandelionError::UndeclaredIdentifier(ident.to_owned()))? + .as_ref() + .unwrap(); // okay, as we matched successfully + + let mut total = 0isize; + for item in &dataset.buffers { + let size = item.data.size; + let offset = item.data.offset; + let src = context.get_chunk_ref(offset, size).unwrap().as_ptr() as *const c_void; + gpu_api::memcpy_h_to_d(dev_ptr, total, src, size)?; + total += size as isize; + } + Ok(()) +} + +pub fn write_gpu_outputs( + output_context: &mut Context, + output_set_names: &[String], + buffer_pool: &BufferPool, + #[cfg(feature = "auto_batching")] batch_size: usize, +) -> DandelionResult<()> { + let base = match &output_context.context { + ContextType::Gpu(ref mmu_context) => mmu_context.storage.as_ptr(), + #[cfg(feature = "gpu_process")] + ContextType::GpuProcess(ref gpu_process_context) => gpu_process_context.as_ptr(), + _ => return Err(DandelionError::ConfigMissmatch), + }; + + let mut output_sets = vec![]; + + #[cfg(not(feature = "auto_batching"))] + { + let mut buffers = vec![]; + + for output_name in output_set_names { + let dev_ptr = buffer_pool.get_pointer(output_name)?; + let size = buffer_pool.get_size(output_name)?; + + let buf_offset = output_context.get_free_space(size, 8)?; + let dst = unsafe { base.byte_offset(buf_offset as isize) } as *const c_void; + + gpu_api::memcpy_d_to_h(dst, &dev_ptr, size)?; + + buffers.push(DataItem { + ident: output_name.clone(), + data: Position { + offset: buf_offset, + size: size, + }, + key: 0u32, + }); + output_context.occupy_space(buf_offset, size)?; + } + + output_sets.push(Some(DataSet { + ident: "outputs".to_string(), + buffers: buffers, + })); + } + + #[cfg(feature = "auto_batching")] + for i in 0..batch_size { + let mut buffers = vec![]; + + for output_name in output_set_names { + // Output-related variables: + let dev_ptr = buffer_pool.get_pointer(output_name)?; + let size_batch = buffer_pool.get_size(output_name)?; + let size_single = size_batch / batch_size; + + // Batch_idx-related variables: + let buf_offset = output_context.get_free_space(size_single, 8)?; + let dst = unsafe { base.byte_offset(buf_offset as isize) } as *const c_void; + let dev_ptr_idx = DevicePointer { + ptr: unsafe { (dev_ptr.ptr as *const u8).add(size_single) } as *const c_void, + }; + + gpu_api::memcpy_d_to_h(dst, &dev_ptr_idx, size_single)?; + + buffers.push(DataItem { + ident: output_name.clone(), + data: Position { + offset: buf_offset, + size: size_single, + }, + key: 0u32, + }); + output_context.occupy_space(buf_offset, size_single)?; + } + + let outputs_name_idx = format!("outputs{}", i); + output_sets.push(Some(DataSet { + ident: outputs_name_idx, + buffers: buffers, + })); + } + + output_context.content = output_sets; + + Ok(()) +} + +pub fn get_size( + sizing: &Sizing, + buffer_pool: &BufferPool, + context: &Context, + #[cfg(feature = "auto_batching")] batch_size: usize, +) -> DandelionResult { + match sizing { + Sizing::Absolute(size) => Ok(*size), + #[cfg(feature = "auto_batching")] + Sizing::AbsoluteByBatch(size) => Ok(*size * batch_size as u64), + #[cfg(feature = "auto_batching")] + Sizing::AbsoluteByBatchEven(size) => { + let mut tmp = if batch_size % 2 == 0 { + batch_size + } else { + batch_size + 1 + }; + tmp /= 2; + Ok(*size * tmp as u64) + } + Sizing::FromInput { bufname, idx } => { + let dataset = context + .content + .iter() + .find(|&elem| match elem { + Some(set) => &set.ident == bufname, + _ => false, + }) + .ok_or(DandelionError::UndeclaredIdentifier(bufname.to_owned()))? + .as_ref() + .unwrap(); // okay, as we matched successfully + + let data_item = dataset + .buffers + .first() + .ok_or(DandelionError::FromInputOutOfBounds)?; + + let relative_offset = *idx * std::mem::size_of::(); + if relative_offset > data_item.data.size { + return Err(DandelionError::FromInputOutOfBounds); + } + + let mut buf: [u64; 1] = [0]; + context.read(data_item.data.offset + relative_offset, &mut buf)?; + + Ok(buf[0]) + } + Sizing::Sizeof(bufname) => Ok(buffer_pool.get_size(bufname)? as u64), + } +} + +struct Worker { + process: Child, + pub stdin: ChildStdin, + pub stdout: BufReader, + // pub available: Notify, + // pub debt: Mutex>, +} + +impl Worker { + fn new(core_id: u8, gpu_id: u8, worker_count: u8) -> Self { + // this trick gives the desired path of mmu_worker for packages within the workspace + // Note: the gpu_worker binary required is assumed to be present (look at README.md) + let path = std::env::var("PROCESS_WORKER_PATH").unwrap_or(format!( + "{}/../target/{}-unknown-linux-gnu/{}/gpu_worker", + env!("CARGO_MANIFEST_DIR"), + std::env::consts::ARCH, + if cfg!(debug_assertions) { + "debug" + } else { + "release" + }, + )); + + let mut child = Command::new(path) + .arg(core_id.to_string()) + .arg(gpu_id.to_string()) + .arg(worker_count.to_string()) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .spawn() + .expect("Spawning GPU worker failed"); + + // Unwrapping okay, since child is spawned with piped stdin/stdout + let stdin = child.stdin.take().unwrap(); + let stdout = BufReader::new(child.stdout.take().unwrap()); + + Self { + process: child, + stdin, + stdout, + } + } +} + +impl Drop for Worker { + fn drop(&mut self) { + if let Err(e) = self.process.kill() { + error!("Killing Worker process gave: {}", e); + } + } +} + +// slightly modified Context that can be exchanged between processes +#[derive(Serialize, Deserialize, Debug)] +pub struct SendContext { + pub context_filename: String, + pub content: Vec>, + pub offset: i64, + pub size: usize, + pub state: ContextState, + pub occupation: Vec, +} + +#[cfg(feature = "gpu_process")] +impl TryFrom for Context { + type Error = DandelionError; + + fn try_from(value: SendContext) -> Result { + let filename = &value.context_filename; + let shmem_fd = match shm_open(filename.as_str(), OFlag::O_RDWR, Mode::S_IRUSR) { + Err(err) => { + error!("Error opening shared memory file: {}:{}", err, err.desc()); + return Err(DandelionError::FileError); + } + Ok(fd) => fd, + }; + + let size = match fstat(shmem_fd) { + Err(err) => { + error!("Error getting file stats: {}:{}", err, err.desc()); + return Err(DandelionError::FileError); + } + Ok(stat) => stat.st_size as usize, + }; + + let ptr = unsafe { + match mmap( + None, + NonZeroUsize::new(size).unwrap(), + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_SHARED, + shmem_fd, + value.offset, + ) { + Err(err) => { + eprintln!( + "Error mapping memory from file {} with size {} and offset {}: {}:{}", + filename, + size, + value.offset, + err, + err.desc() + ); + return Err(DandelionError::MemoryAllocationError); + } + Ok(ptr) => ptr as *mut _, + } + }; + + Ok(Self { + context: ContextType::GpuProcess(Box::new(GpuProcessContext { ptr, size })), + content: value.content, + size: value.size, + state: value.state, + occupation: value.occupation, + }) + } +} + +impl TryFrom<&Context> for SendContext { + type Error = DandelionError; + fn try_from(value: &Context) -> DandelionResult { + let ContextType::Gpu(ref ctxt) = value.context else { + return Err(DandelionError::ConfigMissmatch); + }; + + // Cloning so much on the hot path is not optimal; a custom serialisation function would be quicker + Ok(SendContext { + // unwrap okay, as Mmu memory is always created as shared so a filename exists + context_filename: ctxt.storage.filename().unwrap().to_string(), + content: value.content.clone(), + offset: ctxt.storage.offset(), + size: ctxt.storage.size(), + state: value.state.clone(), + occupation: value.occupation.clone(), + }) + } +} + +#[derive(Serialize, Deserialize)] +pub struct SendFunctionArgs { + pub config: GpuConfig, + pub context: SendContext, + pub output_sets: Arc>, +} + +/* +fn manage_worker( + resources: (u8, u8, u8), + core_id: u8, + gpu_id: u8, + queue: Arc, + done: Arc, +) { + // set core affinity + if !core_affinity::set_for_current(core_affinity::CoreId { id: core_id.into() }) { + log::error!("core received core id that could not be set"); + return; + } + let mut worker = Worker::new(core_id + 1, gpu_id, resources.2); + let mut line = String::new(); + + loop { + // A different worker thread got the shutdown signal + if done.load(Ordering::SeqCst) { + return; + } + + let (args, debt) = queue.get_engine_args(); + match args { + WorkToDo::FunctionArguments { + config, + mut context, + output_sets, + mut recorder, + } => { + // transform relevant data into serialisable counterparts + let FunctionConfig::GpuConfig(config) = config else { + debt.fulfill(Err(DandelionError::ConfigMissmatch)); + return; + }; + let Ok(send_context) = (&context).try_into() else { + debt.fulfill(Err(DandelionError::ConfigMissmatch)); + return; + }; + + let mut task = serde_json::to_string(&SendFunctionArgs { + config, + context: send_context, + output_sets, + }) + .unwrap(); + + // Very important to add this newline, as the worker reads line by line + task += "\n"; + + recorder.record(RecordPoint::EngineStart); + + // Write task description to worker process stdin + worker + .stdin + .write_all(task.as_bytes()) + .expect("Writing failed"); + + loop { + line.clear(); + if worker.stdout.read_line(&mut line).unwrap() == 0 { + panic!("Reading Worker output gave EOF!") + } + if !line.trim().starts_with("__ERROR__") && line.trim() != "__OK__" { + // The line is some other output from the GPU, log it + debug!("GPU output: {}", line); + continue; + } + + recorder.record(RecordPoint::EngineEnd); + if line.trim().starts_with("__ERROR__") { + error!("GPU error: {}", line); + println!("GPU error: {}", line); + debt.fulfill(Err(DandelionError::EngineError)); + break; + } else { + read_output_structs::(&mut context, 0).unwrap(); + debt.fulfill(Ok(WorkDone::Context(context))); + break; + } + } + } + WorkToDo::TransferArguments { + source, + mut destination, + destination_set_index, + destination_allignment, + destination_item_index, + destination_set_name, + source_set_index, + source_item_index, + mut recorder, + } => { + recorder.record(RecordPoint::TransferStart); + let transfer_result = memory_domain::transfer_data_item( + &mut destination, + source, + destination_set_index, + destination_allignment, + destination_item_index, + destination_set_name.as_str(), + source_set_index, + source_item_index, + ); + recorder.record(RecordPoint::TransferEnd); + let transfer_return = transfer_result.and(Ok(WorkDone::Context(destination))); + debt.fulfill(transfer_return); + continue; + } + WorkToDo::ParsingArguments { + driver, + path, + static_domain, + mut recorder, + } => { + recorder.record(RecordPoint::ParsingStart); + let function_result = driver.parse_function(path, &static_domain); + recorder.record(RecordPoint::ParsingEnd); + match function_result { + Ok(function) => debt.fulfill(Ok(WorkDone::Function(function))), + Err(err) => debt.fulfill(Err(err)), + } + continue; + } + WorkToDo::LoadingArguments { + function, + domain, + ctx_size, + mut recorder, + } => { + recorder.record(RecordPoint::LoadStart); + let load_result = function.load(&domain, ctx_size); + recorder.record(RecordPoint::LoadEnd); + match load_result { + Ok(context) => debt.fulfill(Ok(WorkDone::Context(context))), + Err(err) => debt.fulfill(Err(err)), + } + continue; + } + WorkToDo::Shutdown() => { + // Return original resources that were given to Engine + debt.fulfill(Ok(WorkDone::Resources(vec![ComputeResource::GPU( + resources.0, + resources.1, + resources.2, + )]))); + + // Inform other threads to shutdown as well when they are done + done.swap(true, Ordering::SeqCst); + return; + } + } + } +} + +pub fn start_gpu_process_pool( + core_id: u8, + gpu_id: u8, + worker_count: u8, + queue: Box, +) { + let done = Arc::new(AtomicBool::new(false)); + let queue: Arc = queue.into(); + for offset in 0..worker_count { + let queue = queue.clone(); + let done = done.clone(); + spawn(move || { + manage_worker( + (core_id, gpu_id, worker_count), + core_id + 2 * offset, + gpu_id, + queue, + done, + ) + }); + } +} +*/ diff --git a/machine_interface/src/function_driver/compute_driver/gpu/hip.rs b/machine_interface/src/function_driver/compute_driver/gpu/hip.rs new file mode 100644 index 00000000..54f62589 --- /dev/null +++ b/machine_interface/src/function_driver/compute_driver/gpu/hip.rs @@ -0,0 +1,273 @@ +//! Rust bindings for a subset of the HIP runtime API as of ROCm 6.1.2 + +use dandelion_commons::{DandelionError, DandelionResult}; +use libc::{c_void, size_t}; +use std::{ + ffi::{CStr, CString}, + ptr::null, +}; + +type ErrorT = u32; + +// typedef struct ihipModule_t* hipModule_t +type _ModuleT = *const c_void; +pub struct Module(_ModuleT); + +unsafe impl Send for Module {} +unsafe impl Sync for Module {} + +// typedef struct iHipModuleSymbol_t* hipFunction_t +type _FunctionT = *const c_void; +pub struct Function(_FunctionT); + +unsafe impl Send for Function {} +unsafe impl Sync for Function {} + +// typedef struct iHipStream_t* hipStream_t +pub type StreamT = *const c_void; +pub const DEFAULT_STREAM: StreamT = null(); + +// has to be pub to allow address-getting when preparing args +pub struct DeviceAllocation { + pub ptr: *const c_void, + pub size: usize, + device: u8, +} + +unsafe impl Send for DeviceAllocation {} +unsafe impl Sync for DeviceAllocation {} + +// Should be associated with a DeviceAllocation; maybe use lifetimes for this in the future +#[repr(C)] // We take a raw pointers, so make sure the layout is as expected +pub struct DevicePointer { + pub ptr: *const c_void, +} + +#[link(name = "amdhip64")] +extern "C" { + fn hipGetDeviceCount(count: *const i32) -> ErrorT; + fn hipSetDevice(gpu_id: i32) -> ErrorT; + fn hipGetDevice(deviceId: *const i32) -> ErrorT; + fn hipDeviceSynchronize() -> ErrorT; + fn hipDeviceSetLimit(limit: u32, value: size_t) -> ErrorT; + fn hipModuleLoad(module: *mut _ModuleT, fname: *const i8) -> ErrorT; + fn hipModuleLoadData(module: *mut _ModuleT, image: *const c_void) -> ErrorT; + fn hipModuleUnload(module: _ModuleT) -> ErrorT; + fn hipModuleGetFunction( + function: *mut _FunctionT, + module: _ModuleT, + kname: *const i8, + ) -> ErrorT; + fn hipModuleLaunchKernel( + function: _FunctionT, + grid_dim_x: u32, + grid_dim_y: u32, + grid_dim_z: u32, + block_dim_x: u32, + block_dim_y: u32, + block_dim_z: u32, + shared_mem_bytes: u32, + stream: StreamT, + kernel_params: *const *const c_void, + extra: *const *const c_void, + ) -> ErrorT; + fn hipGetErrorString(hipError: ErrorT) -> *const i8; + fn hipMalloc(ptr: *mut *const c_void, size: size_t) -> ErrorT; + fn hipFree(ptr: *const c_void) -> ErrorT; + fn hipMemcpyHtoD(dst: *const c_void, src: *const c_void, sizeBytes: size_t) -> ErrorT; + fn hipMemcpyDtoH(dst: *const c_void, src: *const c_void, sizeBytes: size_t) -> ErrorT; + fn hipMemset(dst: *const c_void, value: i32, sizeBytes: size_t) -> ErrorT; +} + +fn get_error_string(hip_error: ErrorT) -> String { + unsafe { + CStr::from_ptr(hipGetErrorString(hip_error) as *mut i8) + .to_str() + .expect("Invalid ROCm error string (shouldn't happen)") + .to_string() + } +} + +macro_rules! checked_call { + ($fcall: expr) => { + unsafe { + let error = $fcall; + if error != 0 { + return Err(DandelionError::HipError(get_error_string(error))); + } + } + }; +} + +pub fn set_device(gpu_id: u8) -> DandelionResult<()> { + checked_call!(hipSetDevice(gpu_id as i32)); + Ok(()) +} + +pub fn get_device_count() -> DandelionResult { + let mut ret: i32 = -1; + checked_call!(hipGetDeviceCount(&mut ret as *const i32)); + + ret.try_into() + .map_err(|_| DandelionError::EngineResourceError) +} + +pub fn get_device() -> DandelionResult { + let mut ret: i32 = 0; + checked_call!(hipGetDevice(&mut ret as *const i32)); + + ret.try_into() + .map_err(|_| DandelionError::EngineResourceError) +} + +pub fn device_synchronize() -> DandelionResult<()> { + checked_call!(hipDeviceSynchronize()); + Ok(()) +} + +pub fn limit_heap_size(size: usize) -> DandelionResult<()> { + // hipLimitMallocHeapSize = 2 + checked_call!(hipDeviceSetLimit(2, size)); + Ok(()) +} + +pub fn module_load(path: &str) -> DandelionResult { + let mut ret: _ModuleT = null(); + let fname = + CString::new(path).or(Err(DandelionError::HipError("Invalid Module Path".into())))?; + checked_call!(hipModuleLoad(&mut ret as *mut _ModuleT, fname.as_ptr())); + Ok(Module(ret)) +} + +/// # Safety +/// Requires *image* to point to a valid hsaco code object +pub fn module_load_data(image: *const c_void) -> DandelionResult { + let mut ret: _ModuleT = null(); + + checked_call!(hipModuleLoadData(&mut ret as *mut _ModuleT, image)); + Ok(Module(ret)) +} + +pub fn module_get_function(module: &Module, name: &str) -> DandelionResult { + let mut ret: _FunctionT = null(); + let kname = CString::new(name).or(Err(DandelionError::HipError("Invalid Name".into())))?; + checked_call!(hipModuleGetFunction( + &mut ret as *mut _FunctionT, + module.0, + kname.as_ptr() + )); + Ok(Function(ret)) +} + +/// # Safety +/// Requires *kernel_params* to point to an array of valid pointers to kernel arguments +#[allow(clippy::too_many_arguments)] +pub fn module_launch_kernel( + function: &Function, + grid_dim_x: u32, + grid_dim_y: u32, + grid_dim_z: u32, + block_dim_x: u32, + block_dim_y: u32, + block_dim_z: u32, + shared_mem_bytes: u32, + stream: StreamT, + kernel_params: *const *const c_void, + extra: *const *const c_void, +) -> DandelionResult<()> { + checked_call!(hipModuleLaunchKernel( + function.0, + grid_dim_x, + grid_dim_y, + grid_dim_z, + block_dim_x, + block_dim_y, + block_dim_z, + shared_mem_bytes as u32, + stream, + kernel_params, + extra, + )); + Ok(()) +} + +impl Drop for Module { + fn drop(&mut self) { + unsafe { + if hipModuleUnload(self.0) != 0 { + panic!("Unloading module failed"); + } + } + } +} + +pub fn malloc(ptr: &mut *const c_void, size: size_t) -> ErrorT { + unsafe { hipMalloc(ptr as *mut *const c_void, size) } +} + +impl DeviceAllocation { + pub fn try_new(size: usize) -> DandelionResult { + let mut ret: *const c_void = null(); + checked_call!(hipMalloc(&mut ret as *mut *const c_void, size)); + // zero out memory + checked_call!(hipMemset(ret, 0, size)); + + let device = get_device()?; + Ok(Self { + ptr: ret, + size, + device, + }) + } + + pub fn zero_out(&mut self) -> DandelionResult<()> { + checked_call!(hipMemset(self.ptr, 0, self.size)); + Ok(()) + } + + pub fn zero_size(&mut self, size: usize) -> DandelionResult<()> { + checked_call!(hipMemset(self.ptr, 0, size)); + Ok(()) + } +} + +impl Drop for DeviceAllocation { + fn drop(&mut self) { + // Not entirely sure if this is required but device allocations are freed off the hot path anyway + let curr_dev = get_device().expect("Need to be able to get current device before freeing"); + set_device(self.device).expect("Need to be able to set device before freeing"); + unsafe { + if hipFree(self.ptr) != 0 { + panic!("Freeing a device pointer failed (this shouldn't happen)"); + } + } + set_device(curr_dev).expect("Need to be able to restore device after freeing"); + } +} + +/// # Safety +/// Requires *src* to point to valid memory +pub fn memcpy_h_to_d( + dst: &DevicePointer, + dev_offset: isize, + src: *const c_void, + size_bytes: usize, +) -> DandelionResult<()> { + checked_call!(hipMemcpyHtoD( + dst.ptr.byte_offset(dev_offset), + src, + size_bytes + )); + Ok(()) +} + +/// # Safety +/// Requires *dst* to point to valid memory +pub fn memcpy_d_to_h( + dst: *const c_void, + src: &DevicePointer, + size_bytes: usize, +) -> DandelionResult<()> { + checked_call!(hipMemcpyDtoH(dst, src.ptr, size_bytes)); + Ok(()) +} diff --git a/machine_interface/src/function_driver/compute_driver/mmu.rs b/machine_interface/src/function_driver/compute_driver/mmu.rs index c86e8f4f..2f51cfb8 100644 --- a/machine_interface/src/function_driver/compute_driver/mmu.rs +++ b/machine_interface/src/function_driver/compute_driver/mmu.rs @@ -175,7 +175,10 @@ struct MmuLoop { } impl EngineLoop for MmuLoop { - fn init(core_id: u8) -> DandelionResult> { + fn init(resource: ComputeResource) -> DandelionResult> { + let ComputeResource::CPU(core_id) = resource else { + return Err(DandelionError::EngineResourceError); + }; return Ok(Box::new(MmuLoop { cpu_slot: core_id })); } fn run( @@ -224,7 +227,7 @@ impl Driver for MmuDriver { fn start_engine( &self, resource: ComputeResource, - queue: Box, + queue: Box, ) -> DandelionResult<()> { let cpu_slot = match resource { ComputeResource::CPU(core) => core, diff --git a/machine_interface/src/function_driver/compute_driver/wasm.rs b/machine_interface/src/function_driver/compute_driver/wasm.rs index 5de00eb4..8cf18d4f 100644 --- a/machine_interface/src/function_driver/compute_driver/wasm.rs +++ b/machine_interface/src/function_driver/compute_driver/wasm.rs @@ -17,8 +17,8 @@ type WasmEntryPoint = fn(&mut [u8], usize) -> Option; struct WasmLoop {} impl EngineLoop for WasmLoop { - fn init(_core_id: u8) -> DandelionResult> { - return Ok(Box::new(WasmLoop {})); + fn init(_core_id: ComputeResource) -> DandelionResult> { + Ok(Box::new(WasmLoop {})) } fn run( &mut self, @@ -71,7 +71,7 @@ impl Driver for WasmDriver { fn start_engine( &self, resource: ComputeResource, - queue: Box, + queue: Box, ) -> DandelionResult<()> { // sanity checks; extract core id let cpu_slot = match resource { diff --git a/machine_interface/src/function_driver/load_utils.rs b/machine_interface/src/function_driver/load_utils.rs index 6d7ed38c..c7348211 100644 --- a/machine_interface/src/function_driver/load_utils.rs +++ b/machine_interface/src/function_driver/load_utils.rs @@ -5,7 +5,7 @@ use crate::{ use dandelion_commons::{DandelionError, DandelionResult}; use std::sync::Arc; -#[cfg(any(feature = "cheri", feature = "mmu", feature = "kvm"))] +#[cfg(any(feature = "cheri", feature = "mmu", feature = "kvm", feature = "gpu"))] pub fn load_u8_from_file(full_path: String) -> DandelionResult> { let mut file = match std::fs::File::open(full_path) { Ok(f) => f, diff --git a/machine_interface/src/function_driver/system_driver/reqwest.rs b/machine_interface/src/function_driver/system_driver/reqwest.rs index 748d9c50..43834d32 100644 --- a/machine_interface/src/function_driver/system_driver/reqwest.rs +++ b/machine_interface/src/function_driver/system_driver/reqwest.rs @@ -817,7 +817,7 @@ impl Driver for ReqwestDriver { fn start_engine( &self, resource: ComputeResource, - queue: Box, + queue: Box, ) -> DandelionResult<()> { log::debug!("Starting hyper engine"); let core_id = match resource { diff --git a/machine_interface/src/function_driver/system_driver/system_driver_tests.rs b/machine_interface/src/function_driver/system_driver/system_driver_tests.rs index 1a67dbae..5ac617ff 100644 --- a/machine_interface/src/function_driver/system_driver/system_driver_tests.rs +++ b/machine_interface/src/function_driver/system_driver/system_driver_tests.rs @@ -1,4 +1,5 @@ #[cfg(all(test, any(feature = "reqwest_io")))] +#[allow(clippy::module_inception)] mod system_driver_tests { use crate::{ function_driver::{ diff --git a/machine_interface/src/function_driver/thread_utils.rs b/machine_interface/src/function_driver/thread_utils.rs index 96dc6da8..9c6bee71 100644 --- a/machine_interface/src/function_driver/thread_utils.rs +++ b/machine_interface/src/function_driver/thread_utils.rs @@ -3,28 +3,44 @@ use crate::{ memory_domain::{self, Context}, }; use core::marker::Send; -use dandelion_commons::{records::RecordPoint, DandelionResult}; +use dandelion_commons::{ + records::{RecordPoint, Recorder}, + DandelionResult, +}; use std::thread::spawn; +#[cfg(feature = "auto_batching")] +use crate::function_driver::BatchInfo; + extern crate alloc; pub trait EngineLoop { - fn init(core_id: u8) -> DandelionResult>; + fn init(resource: ComputeResource) -> DandelionResult>; fn run( &mut self, config: FunctionConfig, context: Context, output_sets: std::sync::Arc>, + recorder: Recorder, ) -> DandelionResult; } -fn run_thread(core_id: u8, queue: Box) { +pub fn run_thread( + initialisation_resource: ComputeResource, + queue: Box, +) { + // get CPU from resource + let core_id = match initialisation_resource { + ComputeResource::CPU(id) => id, + ComputeResource::GPU(id, _, _) => id, + }; // set core affinity if !core_affinity::set_for_current(core_affinity::CoreId { id: core_id.into() }) { log::error!("core received core id that could not be set"); return; } - let mut engine_state = E::init(core_id).expect("Failed to initialize thread state"); + let mut engine_state = + E::init(initialisation_resource).expect("Failed to initialize thread state"); loop { // TODO catch unwind so we can always return an error or shut down gracefully let (args, debt) = queue.get_engine_args(); @@ -37,7 +53,8 @@ fn run_thread(core_id: u8, queue: Box) { } => { recorder.record(RecordPoint::EngineStart); - let result = engine_state.run(config, context, output_sets); + let subrecorder = recorder.get_sub_recorder(); + let result = engine_state.run(config, context, output_sets, subrecorder); recorder.record(RecordPoint::EngineEnd); drop(recorder); @@ -108,6 +125,24 @@ fn run_thread(core_id: u8, queue: Box) { } continue; } + #[cfg(feature = "auto_batching")] + WorkToDo::BatchAtom { + function_id, + inputs, + recorder, + inputs_vec, + children_debts, + gpu_id, + } => { + debt.fulfill(Ok(WorkDone::SharedContext(BatchInfo { + batch_pos: 0, + inputs_vec, + context_arc: None, + children_debts, + gpu_id, + }))); + continue; + } WorkToDo::Shutdown() => { debt.fulfill(Ok(WorkDone::Resources(vec![ComputeResource::CPU(core_id)]))); return; @@ -117,5 +152,5 @@ fn run_thread(core_id: u8, queue: Box) { } pub fn start_thread(cpu_slot: u8, queue: Box) -> () { - spawn(move || run_thread::(cpu_slot, queue)); + spawn(move || run_thread::(ComputeResource::CPU(cpu_slot), queue)); } diff --git a/machine_interface/src/interface.rs b/machine_interface/src/interface.rs index bd62b219..96b3645f 100644 --- a/machine_interface/src/interface.rs +++ b/machine_interface/src/interface.rs @@ -6,6 +6,7 @@ use dandelion_commons::{DandelionError, DandelionResult}; use libc::{c_int, size_t, uintptr_t}; use log::trace; extern crate alloc; +use std::fmt::Debug; pub trait SizedIntTrait where @@ -15,24 +16,28 @@ where fn to_native(self) -> DandelionResult; } +#[macro_export] /// macro to convert usize to SizeT macro_rules! size_t { ($val:expr) => { SizeT::from_native($val)? }; } +#[macro_export] /// macro to convert SizeT to usize macro_rules! usize { ($val:expr) => { SizeT::to_native($val)? }; } +#[macro_export] /// macro to convert usize to PtrT macro_rules! ptr_t { ($val:expr) => { PtrT::from_native($val)? }; } +#[macro_export] /// macro to convert PtrT to usize macro_rules! usize_ptr { ($val:expr) => { @@ -91,33 +96,33 @@ pub mod _native { #[derive(Debug, Clone, Default)] #[repr(C)] pub struct DandelionSystemData { - exit_code: c_int, - heap_begin: PtrT, // uintptr_t, - heap_end: PtrT, // uintptr_t, - input_sets_len: SizeT, // size_t, - input_sets: PtrT, // *const IoSetInfo, - output_sets_len: SizeT, // size_t, - output_sets: PtrT, // *const IoSetInfo, - input_bufs: PtrT, // *const IoBufferDescriptor, - output_bufs: PtrT, // *const IoBufferDescriptor, + pub exit_code: c_int, + pub heap_begin: PtrT, // uintptr_t, + pub heap_end: PtrT, // uintptr_t, + pub input_sets_len: SizeT, // size_t, + pub input_sets: PtrT, // *const IoSetInfo, + pub output_sets_len: SizeT, // size_t, + pub output_sets: PtrT, // *const IoSetInfo, + pub input_bufs: PtrT, // *const IoBufferDescriptor, + pub output_bufs: PtrT, // *const IoBufferDescriptor, } -#[derive(Clone)] +#[derive(Clone, Debug)] #[repr(C)] -struct IoSetInfo { - ident: PtrT, // uintptr_t, - ident_len: SizeT, // size_t, - offset: SizeT, // size_t, +pub struct IoSetInfo { + pub ident: PtrT, // uintptr_t, + pub ident_len: SizeT, // size_t, + pub offset: SizeT, // size_t, } #[derive(Clone)] #[repr(C)] -struct IoBufferDescriptor { - ident: PtrT, // uintptr_t, - ident_len: SizeT, // size_t, - data: PtrT, // uintptr_t, - data_len: SizeT, // size_t, - key: SizeT, // size_t, +pub struct IoBufferDescriptor { + pub ident: PtrT, // uintptr_t, + pub ident_len: SizeT, // size_t, + pub data: PtrT, // uintptr_t, + pub data_len: SizeT, // size_t, + pub key: SizeT, // size_t, } pub fn setup_input_structs( diff --git a/machine_interface/src/lib.rs b/machine_interface/src/lib.rs index 15ba54fd..eb7c51d1 100644 --- a/machine_interface/src/lib.rs +++ b/machine_interface/src/lib.rs @@ -6,7 +6,14 @@ pub mod promise; /// contexts, and their compatibility with fast to look up structs pub mod machine_config; -#[cfg(any(feature = "cheri", feature = "mmu", feature = "kvm", feature = "wasm"))] +#[cfg(any( + feature = "cheri", + feature = "mmu", + feature = "wasm", + feature = "kvm", + feature = "gpu", + feature = "reqwest" +))] mod interface; pub mod util; @@ -44,13 +51,13 @@ pub struct Position { pub size: usize, } -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize, Clone)] pub struct DataSet { pub ident: String, pub buffers: Vec, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct DataItem { pub ident: String, pub data: Position, diff --git a/machine_interface/src/machine_config.rs b/machine_interface/src/machine_config.rs index 576d6f6d..b1fac4b1 100644 --- a/machine_interface/src/machine_config.rs +++ b/machine_interface/src/machine_config.rs @@ -18,6 +18,10 @@ pub enum EngineType { RWasm, #[cfg(feature = "mmu")] Process, + #[cfg(feature = "gpu_thread")] + GpuThread, + #[cfg(feature = "gpu_process")] + GpuProcess, #[cfg(feature = "kvm")] Kvm, } @@ -32,6 +36,8 @@ pub enum DomainType { RWasm, #[cfg(feature = "mmu")] Process, + #[cfg(feature = "gpu")] + Gpu, } pub fn get_compatibilty_table() -> BTreeMap { @@ -44,6 +50,10 @@ pub fn get_compatibilty_table() -> BTreeMap { (EngineType::RWasm, DomainType::RWasm), #[cfg(feature = "mmu")] (EngineType::Process, DomainType::Process), + #[cfg(feature = "gpu_thread")] + (EngineType::GpuThread, DomainType::Gpu), + #[cfg(feature = "gpu_process")] + (EngineType::GpuProcess, DomainType::Gpu), #[cfg(feature = "kvm")] (EngineType::Kvm, DomainType::Mmap), ]); @@ -77,6 +87,8 @@ pub fn get_available_domains( size: 0, }, ), + #[cfg(feature = "gpu")] + (DomainType::Gpu, MemoryResource::Shared { id: u64::MAX, size: 0 }), #[cfg(feature = "wasm")] (DomainType::RWasm, MemoryResource::Anonymous { size: 0 }), ]); @@ -107,6 +119,11 @@ pub fn get_available_domains( dom_type, Arc::new(crate::memory_domain::mmu::MmuMemoryDomain::init(resource).unwrap()), ), + #[cfg(feature = "gpu")] + DomainType::Gpu => ( + dom_type, + Arc::new(crate::memory_domain::gpu::GpuMemoryDomain::init(resource).unwrap()), + ), #[cfg(feature = "wasm")] DomainType::RWasm => ( dom_type, @@ -146,6 +163,20 @@ pub fn get_available_drivers() -> BTreeMap { crate::function_driver::compute_driver::mmu::MmuDriver {}, )) as &'static dyn Driver, ), + #[cfg(feature = "gpu_thread")] + ( + EngineType::GpuThread, + Box::leak(Box::new( + crate::function_driver::compute_driver::gpu::GpuThreadDriver {}, + )) as &'static dyn Driver, + ), + #[cfg(feature = "gpu_process")] + ( + EngineType::GpuProcess, + Box::leak(Box::new( + crate::function_driver::compute_driver::gpu::GpuProcessDriver {}, + )) as &'static dyn Driver, + ), #[cfg(feature = "kvm")] ( EngineType::Kvm, diff --git a/machine_interface/src/memory_domain.rs b/machine_interface/src/memory_domain.rs index fa258f8f..a05edc15 100644 --- a/machine_interface/src/memory_domain.rs +++ b/machine_interface/src/memory_domain.rs @@ -3,6 +3,8 @@ pub mod bytes_context; #[cfg(feature = "cheri")] pub mod cheri; +#[cfg(feature = "gpu")] +pub mod gpu; pub mod malloc; pub mod mmap; #[cfg(feature = "mmu")] @@ -14,8 +16,12 @@ pub mod wasm; use crate::{DataItem, DataSet, Position}; use dandelion_commons::{DandelionError, DandelionResult}; +use serde::{Deserialize, Serialize}; use std::sync::Arc; +#[cfg(feature = "gpu")] +use crate::memory_domain::gpu::SubReadOnly; + pub trait ContextTrait: Send + Sync { /// Write data at the given offset into the context /// May fail if the range offset..offset+data lenght in bytes is not completely within the context size @@ -45,6 +51,10 @@ pub enum ContextType { Mmu(Box), #[cfg(feature = "wasm")] Wasm(Box), + #[cfg(feature = "gpu")] + Gpu(Box), + #[cfg(feature = "gpu_process")] + GpuProcess(Box), System(Box), } @@ -60,6 +70,10 @@ impl ContextTrait for ContextType { ContextType::Mmu(context) => context.write(offset, data), #[cfg(feature = "wasm")] ContextType::Wasm(context) => context.write(offset, data), + #[cfg(feature = "gpu")] + ContextType::Gpu(context) => context.write(offset, data), + #[cfg(feature = "gpu_process")] + ContextType::GpuProcess(context) => context.write(offset, data), #[cfg(feature = "bytes_context")] ContextType::Bytes(context) => context.write(offset, data), ContextType::System(context) => context.write(offset, data), @@ -76,6 +90,10 @@ impl ContextTrait for ContextType { ContextType::Mmu(context) => context.read(offset, read_buffer), #[cfg(feature = "wasm")] ContextType::Wasm(context) => context.read(offset, read_buffer), + #[cfg(feature = "gpu")] + ContextType::Gpu(context) => context.read(offset, read_buffer), + #[cfg(feature = "gpu_process")] + ContextType::GpuProcess(context) => context.read(offset, read_buffer), #[cfg(feature = "bytes_context")] ContextType::Bytes(context) => context.read(offset, read_buffer), ContextType::System(context) => context.read(offset, read_buffer), @@ -92,6 +110,10 @@ impl ContextTrait for ContextType { ContextType::Mmu(context) => context.get_chunk_ref(offset, length), #[cfg(feature = "wasm")] ContextType::Wasm(context) => context.get_chunk_ref(offset, length), + #[cfg(feature = "gpu")] + ContextType::Gpu(context) => context.get_chunk_ref(offset, length), + #[cfg(feature = "gpu_process")] + ContextType::GpuProcess(context) => context.get_chunk_ref(offset, length), #[cfg(feature = "bytes_context")] ContextType::Bytes(context) => context.get_chunk_ref(offset, length), ContextType::System(context) => context.get_chunk_ref(offset, length), @@ -99,7 +121,7 @@ impl ContextTrait for ContextType { } } -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize, Clone)] pub enum ContextState { InPreparation, Run(i32), @@ -111,7 +133,7 @@ pub struct Context { pub content: Vec>, pub size: usize, pub state: ContextState, - occupation: Vec, + pub occupation: Vec, } impl ContextTrait for Context { @@ -262,6 +284,7 @@ pub fn transfer_memory( source_offset: usize, size: usize, ) -> DandelionResult<()> { + #[allow(clippy::needless_return)] return match (&mut destination.context, &source.context) { (ContextType::Malloc(destination_ctxt), ContextType::Malloc(source_ctxt)) => { malloc::malloc_transfer( @@ -350,6 +373,40 @@ pub fn transfer_memory( source_offset, size, ), + #[cfg(feature = "gpu")] + (ContextType::Gpu(destination_ctxt), ContextType::ReadOnly(source_ctxt)) => { + // Transfer function registering buffers: weights + .cubin + gpu::read_only_to_gpu_transfer( + destination_ctxt, + source, + destination_offset, + source_offset, + size, + ) + }, + #[cfg(feature = "gpu")] + (ContextType::Gpu(destination_ctxt), ContextType::Gpu(source_ctxt)) => { + // Never called + // Transfer nothing really... + gpu::gpu_transfer( + destination_ctxt, + source_ctxt, + destination_offset, + source_offset, + size, + ) + }, + #[cfg(all(feature = "gpu", feature = "bytes_context"))] + (ContextType::Gpu(destination_ctxt), ContextType::Bytes(source_ctxt)) => { + // Transfer request inputs + gpu::bytes_to_gpu_transfer( + destination_ctxt, + source, + destination_offset, + source_offset, + size, + ) + } // default implementation using reads and writes (destination, source) => { let mut read_buffer: Vec = vec![0; size]; diff --git a/machine_interface/src/memory_domain/gpu.rs b/machine_interface/src/memory_domain/gpu.rs new file mode 100644 index 00000000..4bc6cc47 --- /dev/null +++ b/machine_interface/src/memory_domain/gpu.rs @@ -0,0 +1,341 @@ +use crate::{ + memory_domain::{Context, ContextTrait, ContextType, MemoryDomain, MemoryResource}, + util::mmapmem::{MmapMem, MmapMemPool}, + Position, +}; +use dandelion_commons::{ + DandelionError, + DandelionResult, +}; +use log::{debug, error}; +use nix::sys::mman::ProtFlags; +use std::{ + cmp, + collections::HashMap, + ops::{Deref, DerefMut}, + sync::Arc, +}; + +#[derive(Debug)] +pub struct SubReadOnly { + pub context: Arc, + pub position: Position, +} + +#[derive(Debug)] +pub struct SubBytes { + pub context: Arc, + pub position: Position, +} + +#[derive(Debug)] +pub struct GpuContext { + pub storage: MmapMem, + pub read_only: HashMap, + pub inputs: HashMap, + #[cfg(feature = "auto_batching")] + pub batch_size: usize, +} + +impl ContextTrait for GpuContext { + fn write(&mut self, offset: usize, data: &[T]) -> DandelionResult<()> { + self.storage.write(offset, data) + } + + fn read(&self, offset: usize, read_buffer: &mut [T]) -> DandelionResult<()> { + self.storage.read(offset, read_buffer) + } + + fn get_chunk_ref(&self, offset: usize, length: usize) -> DandelionResult<&[u8]> { + self.storage.get_chunk_ref(offset, length) + } +} + +#[derive(Debug)] +pub struct GpuProcessContext { + // pub storage: MmapMem, + pub ptr: *mut u8, + pub size: usize, +} + +impl GpuProcessContext { + fn size(&self) -> usize { + self.size + } + + pub fn as_ptr(&self) -> *mut u8 { + self.ptr + } + + pub unsafe fn as_slice(&self) -> &[u8] { + std::slice::from_raw_parts(self.as_ptr(), self.size()) + } + + pub unsafe fn as_slice_mut(&mut self) -> &mut [u8] { + std::slice::from_raw_parts_mut(self.as_ptr(), self.size()) + } +} + +impl ContextTrait for GpuProcessContext { + fn write(&mut self, offset: usize, data: &[T]) -> DandelionResult<()> { + // check alignment + if offset % core::mem::align_of::() != 0 { + debug!("Misaligned write at offset {}", offset); + return Err(DandelionError::WriteMisaligned); + } + + // check if the write is within bounds + let write_length = data.len() * core::mem::size_of::(); + if offset + write_length > self.size() { + debug!("Write out of bounds at offset {}", offset); + return Err(DandelionError::InvalidWrite); + } + + // write values + unsafe { + let buffer = core::slice::from_raw_parts(data.as_ptr() as *const u8, write_length); + self.as_slice_mut()[offset..offset + buffer.len()].copy_from_slice(&buffer); + } + + Ok(()) + } + + fn read(&self, offset: usize, read_buffer: &mut [T]) -> DandelionResult<()> { + // check that buffer has proper allighment + if offset % core::mem::align_of::() != 0 { + debug!("Misaligned write at offset {}", offset); + return Err(DandelionError::ReadMisaligned); + } + + let read_size = core::mem::size_of::() * read_buffer.len(); + if offset + read_size > self.size() { + eprintln!( + "InvalidRead in MMM: len {}, offset {}, size {}", + self.size(), + offset, + read_size + ); + debug!("Read out of bounds at offset {}", offset); + return Err(DandelionError::InvalidRead); + } + + // read values, sanitize if necessary + unsafe { + let read_memory = + core::slice::from_raw_parts_mut(read_buffer.as_mut_ptr() as *mut u8, read_size); + read_memory.copy_from_slice(&self.as_slice()[offset..offset + read_size]); + } + + Ok(()) + } + + fn get_chunk_ref(&self, offset: usize, length: usize) -> DandelionResult<&[u8]> { + if offset + length > self.size() { + return Err(DandelionError::InvalidRead); + } + return Ok(unsafe { &self.as_slice()[offset..offset + length] }); + } +} + +unsafe impl Send for GpuProcessContext {} +unsafe impl Sync for GpuProcessContext {} + +impl Deref for GpuProcessContext { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + unsafe { self.as_slice() } + } +} + +impl DerefMut for GpuProcessContext { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { self.as_slice_mut() } + } +} + +/*pub const SLAB_SIZE: usize = 1 << 20; +impl Drop for GpuProcessContext { + fn drop(&mut self) { + let start = usize::try_from(unsafe { self.ptr.offset_from(self.origin.ptr) }).unwrap(); + let start_slab = u32::try_from(start / SLAB_SIZE).unwrap(); + let slab_number = u32::try_from(self.size / SLAB_SIZE).unwrap(); + self.origin + .occupation + .lock() + .unwrap() + .insert(start_slab, start_slab + slab_number); + } +}*/ + +#[derive(Debug)] +pub struct GpuMemoryDomain { + memory_pool: MmapMemPool, +} + +impl MemoryDomain for GpuMemoryDomain { + fn init(config: MemoryResource) -> DandelionResult> { + let (id, size) = match config { + MemoryResource::Shared { id, size } => (id, size), + _ => { + return Err(DandelionError::DomainError( + dandelion_commons::DomainError::ConfigMissmatch, + )) + } + }; + let memory_pool = + MmapMemPool::create(size, ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, Some(id))?; + Ok(Box::new(GpuMemoryDomain { memory_pool })) + } + + fn acquire_context(&self, size: usize) -> DandelionResult { + // create and map a shared memory region + let (mem_space, actual_size) = self + .memory_pool + .get_allocation(size, nix::sys::mman::MmapAdvise::MADV_DONTNEED)?; + + let new_context = Box::new(GpuContext { + storage: mem_space, + read_only: HashMap::new(), + inputs: HashMap::new(), + #[cfg(feature = "auto_batching")] + batch_size: 0, + }); + Ok(Context::new(ContextType::Gpu(new_context), actual_size)) + } +} + +pub fn read_only_to_gpu_transfer( + destination_ctxt: &mut GpuContext, + source: Arc, + destination_offset: usize, + source_offset: usize, + size: usize, +) -> DandelionResult<()> { + let Some(ref data_set) = source.content[0] else { todo!() }; + let ident = &data_set.ident.to_string(); + + #[cfg(feature = "weights_from_disk")] + { + use crate::{DataSet, DataItem}; + use crate::memory_domain::read_only::ReadOnlyContext; + let ContextType::ReadOnly(source_ctxt) = &source.context else { todo!() }; + let disk_path = source_ctxt.disk_path.clone().unwrap(); + let split_path = disk_path.split("/").collect::>(); + let name = split_path[split_path.len() - 1].to_string(); + let data_vec = std::fs::read(&disk_path).unwrap(); + let item_size = data_vec.len(); + let mut new_context = + ReadOnlyContext::new(data_vec.into_boxed_slice()).unwrap(); + new_context.content.push(Some(DataSet { + ident: ident.clone(), + buffers: vec![DataItem { + ident: ident.clone(), + data: Position { + offset: 0, + size: item_size, + }, + key: 0, + }], + })); + let new_source = Arc::new(new_context); + + destination_ctxt.read_only.insert( + ident.clone(), + SubReadOnly { + context: new_source, + position: Position { + offset: source_offset, + size + } + } + ); + } + + #[cfg(not(feature = "weights_from_disk"))] + { + destination_ctxt.read_only.insert( + ident.clone(), + SubReadOnly { + context: source, + position: Position { + offset: source_offset, + size + } + } + ); + } + + Ok(()) +} + +pub fn gpu_transfer( + destination: &mut GpuContext, + source: &GpuContext, + destination_offset: usize, + source_offset: usize, + size: usize, +) -> DandelionResult<()> { + // check if there is space in both contexts + if source.storage.size() < source_offset + size { + error!( + "Out of bounds: storage_size {}, source_offset {}, size {}", + source.storage.size(), + source_offset, + size + ); + return Err(DandelionError::InvalidRead); + } + if destination.storage.size() < destination_offset + size { + return Err(DandelionError::InvalidWrite); + } + unsafe { + destination.storage.as_slice_mut()[destination_offset..destination_offset + size] + .copy_from_slice(&source.storage.as_slice()[source_offset..source_offset + size]); + } + Ok(()) +} + +#[cfg(feature = "bytes_context")] +pub fn bytes_to_gpu_transfer( + destination_ctxt: &mut GpuContext, + source: Arc, + destination_offset: usize, + source_offset: usize, + size: usize, +) -> DandelionResult<()> { + // TODO : generalize to multiple sets + let Some(ref data_set) = source.content[0] else { todo!() }; + + #[cfg(not(feature = "auto_batching"))] + let ident = (&data_set.ident).clone().to_string(); + #[cfg(feature = "auto_batching")] + let ident = { + let mut max_present = -1; + for key in destination_ctxt.inputs.keys() { + let key_split = key.split(&data_set.ident).collect::>(); + if key_split.len() > 1 { + let found_idx = key_split.last(); + if let Some(idx) = found_idx { + max_present = cmp::max(max_present, idx.parse().unwrap()); + } + } + } + + destination_ctxt.batch_size = cmp::max(destination_ctxt.batch_size, (max_present + 2) as usize); + + format!("{}{}", &data_set.ident, (max_present + 1).to_string()) + }; + + destination_ctxt.inputs.insert( + ident, + SubBytes { + context: source, + position: Position { + offset: source_offset, + size + } + } + ); + Ok(()) +} diff --git a/machine_interface/src/memory_domain/mmu.rs b/machine_interface/src/memory_domain/mmu.rs index ebc212e4..213a5213 100644 --- a/machine_interface/src/memory_domain/mmu.rs +++ b/machine_interface/src/memory_domain/mmu.rs @@ -3,7 +3,7 @@ use crate::{ util::mmapmem::{MmapMem, MmapMemPool}, }; use dandelion_commons::{DandelionError, DandelionResult}; -use log::debug; +use log::{error, warn, debug}; use nix::sys::mman::ProtFlags; // TODO: decide this value in a system dependent way @@ -80,6 +80,12 @@ pub fn mmu_transfer( ) -> DandelionResult<()> { // check if there is space in both contexts if source.storage.size() < source_offset + size { + error!( + "Out of bounds: storage_size {}, source_offset {}, size {}", + source.storage.size(), + source_offset, + size + ); return Err(DandelionError::InvalidRead); } if destination.storage.size() < destination_offset + size { diff --git a/machine_interface/src/memory_domain/read_only.rs b/machine_interface/src/memory_domain/read_only.rs index 91b42636..cbffbdf2 100644 --- a/machine_interface/src/memory_domain/read_only.rs +++ b/machine_interface/src/memory_domain/read_only.rs @@ -8,6 +8,8 @@ use log::error; pub struct ReadOnlyContext { storage: &'static mut [u8], layout: Option, + #[cfg(feature = "weights_from_disk")] + pub disk_path: Option, } impl ContextTrait for ReadOnlyContext { @@ -23,6 +25,12 @@ impl ContextTrait for ReadOnlyContext { let read_size = core::mem::size_of::() * read_buffer.len(); if offset + read_size > self.storage.len() { + eprintln!( + "Invalid in ROC: len {}, offset {}, read_size {}", + self.storage.len(), + offset, + read_size + ); return Err(DandelionError::InvalidRead); } let byte_buffer = unsafe { @@ -55,10 +63,31 @@ impl ReadOnlyContext { super::ContextType::ReadOnly(Box::new(ReadOnlyContext { storage: new_ref, layout: Some(layout), + #[cfg(feature = "weights_from_disk")] + disk_path: None, })), ref_len, )); } + + #[cfg(feature = "weights_from_disk")] + pub fn new_disk(reference: Box<[T]>, disk_path: &str) -> DandelionResult { + let ref_len = core::mem::size_of::() * reference.len(); + let layout = core::alloc::Layout::from_size_align(ref_len, core::mem::align_of::()) + .or(Err(DandelionError::ContextReadOnlyLayout))?; + let new_ref = unsafe { + core::slice::from_raw_parts_mut(Box::leak(reference).as_mut_ptr() as *mut u8, ref_len) + }; + return Ok(Context::new( + super::ContextType::ReadOnly(Box::new(ReadOnlyContext { + storage: new_ref, + layout: Some(layout), + disk_path: Some(disk_path.to_string()), + })), + ref_len, + )); + } + pub fn new_static(reference: &'static mut [T]) -> Context { let ref_len = core::mem::size_of::() * reference.len(); let new_ref = @@ -67,6 +96,8 @@ impl ReadOnlyContext { super::ContextType::ReadOnly(Box::new(ReadOnlyContext { storage: new_ref, layout: None, + #[cfg(feature = "weights_from_disk")] + disk_path: None, })), ref_len, ); diff --git a/machine_interface/src/util/mmapmem.rs b/machine_interface/src/util/mmapmem.rs index 969049f2..0f71ecb4 100644 --- a/machine_interface/src/util/mmapmem.rs +++ b/machine_interface/src/util/mmapmem.rs @@ -246,6 +246,12 @@ impl MmapMem { let read_size = core::mem::size_of::() * read_buffer.len(); if offset + read_size > self.size() { + eprintln!( + "InvalidRead in MMM: len {}, offset {}, size {}", + self.size(), + offset, + read_size + ); debug!("Read out of bounds at offset {}", offset); return Err(DandelionError::InvalidRead); } diff --git a/machine_interface/tests/data/cuda/test_gpu_bert.json b/machine_interface/tests/data/cuda/test_gpu_bert.json new file mode 100644 index 00000000..d97b5b02 --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_bert.json @@ -0,0 +1,3423 @@ +{ + "modules": [{"module_name": "bert.cubin", "path": "cuda/bert.cubin"}], + "kernels": [ + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_expand_dims_expand_dims_broadcast_to_cast_subtract_cast_where_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_less_add_where_take_add_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_mean_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_mean_kernel_1"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_1_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_1_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_2_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_3_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_1"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_2"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_3"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_1_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_add_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_reshape_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_variance_kernel"}, + {"module_name": "bert.cubin", "kernel_name": "tvmgen_default_fused_variance_kernel_1"} + ], + "blueprint": { + "inputs": ["input_ids", "attention_mask", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107", "p108", "p109", "p110", "p111", "p112", "p113", "p114", "p115", "p116", "p117", "p118", "p119", "p120", "p121", "p122", "p123", "p124", "p125", "p126", "p127", "p128", "p129", "p130", "p131", "p132", "p133", "p134", "p135", "p136", "p137", "p138", "p139", "p140", "p141", "p142", "p143", "p144", "p145", "p146", "p147", "p148", "p149", "p150", "p151", "p152", "p153", "p154", "p155", "p156", "p157", "p158", "p159", "p160", "p161", "p162", "p163", "p164", "p165", "p166", "p167", "p168", "p169", "p170", "p171", "p172", "p173", "p174", "p175", "p176", "p177", "p178", "p179", "p180", "p181", "p182", "p183", "p184", "p185", "p186", "p187", "p188", "p189", "p190", "p191", "p192", "p193", "p194", "p195", "p196", "p197", "p198", "p199", "p200", "p201", "p202", "p203", "p204", "p205", "p206", "p207", "p208", "p209", "p210", "p211", "p212", "p213", "p214", "p215", "p216", "p217", "p218", "p219", "p220", "p221", "p222", "p223", "p224", "p225"], + "buffers": {"output": {"Absolute": 15627264}, "b4": {"Absolute": 15627264}, "b5": {"Absolute": 512}, "b6": {"Absolute": 512}, "b9": {"Absolute": 786432}, "b13": {"Absolute": 393216}, "b20": {"Absolute": 393216}, "b235": {"Absolute": 8192}, "b236": {"Absolute": 786432}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_less_add_where_take_add_kernel", [{"Ptr": "b4"}, {"Ptr": "input_ids"}, {"Ptr": "p0"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p2"}, {"Ptr": "p3"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "b13"}, {"Ptr": "b4"}, {"Ptr": "p5"}, {"Ptr": "p6"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p7"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b4"}, {"Ptr": "b13"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_expand_dims_expand_dims_broadcast_to_cast_subtract_cast_where_kernel", [{"Ptr": "b13"}, {"Ptr": "attention_mask"}], { + "grid_dim_x": {"Absolute": 16}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p10"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b20"}, {"Ptr": "output"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b20"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "p13"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b20"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b20"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p14"}, {"Ptr": "p15"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p16"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p17"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b20"}, {"Ptr": "output"}, {"Ptr": "p18"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p19"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p20"}, {"Ptr": "p21"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p22"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p26"}, {"Ptr": "p27"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p28"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p30"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p31"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p32"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p34"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p35"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p36"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p37"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p38"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p40"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p41"}, {"Ptr": "p42"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p44"}, {"Ptr": "p45"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p46"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p47"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p48"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p49"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p50"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p52"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p53"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p54"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p55"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p56"}, {"Ptr": "p57"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p58"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p59"}, {"Ptr": "p60"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p61"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p62"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p64"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p65"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p66"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p67"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p68"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p70"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p71"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p72"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p73"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p74"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p76"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p77"}, {"Ptr": "p78"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p79"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p80"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p82"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p83"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p84"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p85"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p86"}, {"Ptr": "p87"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p88"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p90"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p91"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p92"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p94"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p95"}, {"Ptr": "p96"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p97"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p98"}, {"Ptr": "p99"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p100"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p101"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p102"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p103"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p104"}, {"Ptr": "p105"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p106"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p108"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p109"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p110"}, {"Ptr": "p111"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p112"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p113"}, {"Ptr": "p114"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p115"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p116"}, {"Ptr": "p117"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p118"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p120"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p121"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p122"}, {"Ptr": "p123"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p124"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p125"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p126"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p127"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p128"}, {"Ptr": "p129"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p130"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p131"}, {"Ptr": "p132"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p133"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p134"}, {"Ptr": "p135"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p136"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p137"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p138"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p139"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p140"}, {"Ptr": "p141"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p142"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p143"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p144"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p145"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p146"}, {"Ptr": "p147"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p148"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p149"}, {"Ptr": "p150"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p151"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p152"}, {"Ptr": "p153"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p154"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p155"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p156"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p157"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p158"}, {"Ptr": "p159"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p160"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p161"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p162"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p163"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p164"}, {"Ptr": "p165"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p166"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p167"}, {"Ptr": "p168"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p169"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p170"}, {"Ptr": "p171"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p172"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p173"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p174"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p175"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p176"}, {"Ptr": "p177"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p178"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p179"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p180"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p181"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p182"}, {"Ptr": "p183"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p184"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p185"}, {"Ptr": "p186"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p187"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p188"}, {"Ptr": "p189"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p190"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p191"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p192"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p193"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p194"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b20"}, {"Ptr": "p196"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p197"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "p198"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p199"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b4"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p200"}, {"Ptr": "p201"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p202"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p203"}, {"Ptr": "p204"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p205"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p206"}, {"Ptr": "p207"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b9"}, {"Ptr": "output"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b236"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b236"}, {"Ptr": "b235"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b13"}, {"Ptr": "b20"}, {"Ptr": "p208"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b4"}, {"Ptr": "b13"}, {"Ptr": "p209"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b13"}, {"Ptr": "b9"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b9"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b13"}, {"Ptr": "b9"}, {"Ptr": "p210"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "b13"}, {"Ptr": "p211"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_kernel", [{"Ptr": "b13"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p212"}, {"Ptr": "p213"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b13"}, {"Ptr": "p214"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b4"}, {"Ptr": "output"}, {"Ptr": "p215"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b20"}, {"Ptr": "b4"}, {"Ptr": "p216"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b9"}, {"Ptr": "b20"}, {"Ptr": "p217"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b9"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b9"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b20"}, {"Ptr": "b9"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p218"}, {"Ptr": "p219"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b13"}, {"Ptr": "b20"}, {"Ptr": "p220"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_kernel", [{"Ptr": "b20"}, {"Ptr": "b13"}, {"Ptr": "p221"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b20"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b5"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b235"}, {"Ptr": "b20"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b13"}, {"Ptr": "b20"}, {"Ptr": "b5"}, {"Ptr": "b6"}, {"Ptr": "p222"}, {"Ptr": "p223"}], { + "grid_dim_x": {"Absolute": 96}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b4"}, {"Ptr": "b13"}, {"Ptr": "p224"}], { + "grid_dim_x": {"Absolute": 30522}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_1_kernel", [{"Ptr": "output"}, {"Ptr": "b4"}, {"Ptr": "p225"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_double_matmul-new.json b/machine_interface/tests/data/cuda/test_gpu_double_matmul-new.json new file mode 100644 index 00000000..891cafe4 --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_double_matmul-new.json @@ -0,0 +1,33 @@ +{ + "modules": [{"module_name": "kernels.cubin", "path": "kernels.cubin"}], + "kernels": [ + {"module_name": "kernels.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_1_kernel"}, + {"module_name": "kernels.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_kernel"} + ], + "blueprint": { + "inputs": ["input"], + "weights": ["p0", "p1"], + "buffers": {"output": {"Absolute": 12}, "b2": {"Absolute": 20}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "input"}, {"Ptr": "p0"}], { + "grid_dim_x": {"Absolute": 5}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_double_matmul.json b/machine_interface/tests/data/cuda/test_gpu_double_matmul.json new file mode 100644 index 00000000..5daf035c --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_double_matmul.json @@ -0,0 +1,32 @@ +{ + "modules": [{"module_name": "double_matmul.cubin", "path": "cuda/double_matmul.cubin"}], + "kernels": [ + {"module_name": "double_matmul.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_1_kernel"}, + {"module_name": "double_matmul.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_kernel"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1"], + "buffers": {"output": {"Absolute": 12}, "b2": {"Absolute": 20}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "input"}, {"Ptr": "p0"}], { + "grid_dim_x": {"Absolute": 5}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_llama-full.json b/machine_interface/tests/data/cuda/test_gpu_llama-full.json new file mode 100644 index 00000000..ae344d5d --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_llama-full.json @@ -0,0 +1,3981 @@ +{ + "modules": [{"module_name": "llama.cubin", "path": "cuda/llama.cubin"}, {"module_name": "llm_inference.cubin", "path": "cuda/llm_inference.cubin"}], + "kernels": [ + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_less_add_where_take_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_multiply_sum_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_1_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_1_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_2_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_3_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_4_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_1"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_2"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_3"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_1_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_reshape_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_where_divide_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel"}, + {"module_name": "llm_inference.cubin", "kernel_name": "initialize_index_from_token_ids"}, + {"module_name": "llm_inference.cubin", "kernel_name": "indexed_argmax_logits_and_append"} + ], + "blueprint": { + "inputs": ["token_ids", "times", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107", "p108", "p109", "p110", "p111", "p112", "p113", "p114", "p115", "p116", "p117", "p118", "p119", "p120", "p121", "p122", "p123", "p124", "p125", "p126", "p127", "p128", "p129", "p130", "p131", "p132", "p133", "p134", "p135", "p136", "p137", "p138", "p139", "p140", "p141", "p142", "p143", "p144", "p145", "p146", "p147", "p148", "p149", "p150", "p151", "p152", "p153", "p154", "p155", "p156", "p157", "p158", "p159", "p160", "p161", "p162", "p163", "p164", "p165", "p166", "p167", "p168", "p169", "p170", "p171", "p172", "p173", "p174", "p175", "p176", "p177", "p178", "p179", "p180", "p181", "p182", "p183", "p184", "p185", "p186", "p187", "p188", "p189", "p190", "p191", "p192", "p193", "p194", "p195", "p196", "p197", "p198", "p199", "p200", "p201", "p202", "p203", "p204", "p205", "p206", "p207", "p208", "p209", "p210", "p211", "p212", "p213", "p214", "p215", "p216", "p217", "p218", "p219", "p220", "p221", "p222", "p223", "p224", "p225", "p226"], + "buffers": {"index": {"Absolute": 8}, "output": {"Absolute": 65667072}, "b2": {"Absolute": 1048576}, "b3": {"Absolute": 512}, "b5": {"Absolute": 1048576}, "b8": {"Absolute": 4194304}, "b12": {"Absolute": 4194304}, "b234": {"Absolute": 16384}, "b235": {"Absolute": 2097152}}, + "outputs": ["token_ids"], + "control_flow": [ + {"ExecKernel": ["initialize_index_from_token_ids", [{"Ptr": "token_ids"}, {"Ptr": "index"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"Repeat": [{"FromInput": {"bufname": "times", "idx": 0}}, [ + {"ExecKernel": ["tvmgen_default_fused_less_add_where_take_kernel", [{"Ptr": "b2"}, {"Ptr": "token_ids"}, {"Ptr": "p0"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p2"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p5"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}, {"Ptr": "p6"}, {"Ptr": "p7"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}, {"Ptr": "p8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p10"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}, {"Ptr": "p13"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p15"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p16"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p17"}, {"Ptr": "p18"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p20"}, {"Ptr": "p21"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p22"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p26"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p27"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p28"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p30"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p31"}, {"Ptr": "p32"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p34"}, {"Ptr": "p35"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p36"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p38"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p40"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p41"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p42"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p44"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p45"}, {"Ptr": "p46"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p47"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p48"}, {"Ptr": "p49"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p50"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p52"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p53"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p54"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p56"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p57"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p58"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p59"}, {"Ptr": "p60"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p61"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p62"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p64"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p65"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p66"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p67"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p68"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p70"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p71"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p72"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p73"}, {"Ptr": "p74"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p76"}, {"Ptr": "p77"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p78"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p79"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p80"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p82"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p83"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p84"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p85"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p86"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p87"}, {"Ptr": "p88"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p90"}, {"Ptr": "p91"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p92"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p94"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p95"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p96"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p97"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p98"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p99"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p100"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p101"}, {"Ptr": "p102"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p103"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p104"}, {"Ptr": "p105"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p106"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p108"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p109"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p110"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p111"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p112"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p113"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p114"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p115"}, {"Ptr": "p116"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p117"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p118"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p120"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p121"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p122"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p123"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p124"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p125"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p126"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p127"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p128"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p129"}, {"Ptr": "p130"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p131"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p132"}, {"Ptr": "p133"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p134"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p135"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p136"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p137"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p138"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p139"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p140"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p141"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p142"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p143"}, {"Ptr": "p144"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p145"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p146"}, {"Ptr": "p147"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p148"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p149"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p150"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p151"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p152"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p153"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p154"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p155"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p156"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p157"}, {"Ptr": "p158"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p159"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p160"}, {"Ptr": "p161"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p162"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p163"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p164"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p165"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p166"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p167"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p168"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p169"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p170"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p171"}, {"Ptr": "p172"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p173"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p174"}, {"Ptr": "p175"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p176"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p177"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p178"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p179"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p180"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p181"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p182"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p183"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p184"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p185"}, {"Ptr": "p186"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p187"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p188"}, {"Ptr": "p189"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p190"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p191"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p192"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p193"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p194"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p196"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p197"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p198"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p199"}, {"Ptr": "p200"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p201"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p202"}, {"Ptr": "p203"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p204"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p205"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p206"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p207"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p208"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p209"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p210"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p211"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p212"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p213"}, {"Ptr": "p214"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p215"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p216"}, {"Ptr": "p217"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p218"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p219"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p220"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p221"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p222"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p223"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p224"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p225"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_4_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p226"}], { + "grid_dim_x": {"Absolute": 128256}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["indexed_argmax_logits_and_append", [{"Ptr": "output"}, {"Ptr": "index"}, {"Ptr": "token_ids"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ]]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_llama.json b/machine_interface/tests/data/cuda/test_gpu_llama.json new file mode 100644 index 00000000..4a5d74af --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_llama.json @@ -0,0 +1,3959 @@ +{ + "modules": [{"module_name": "llama.cubin", "path": "cuda/llama.cubin"}], + "kernels": [ + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_less_add_where_take_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_multiply_sum_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_1_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_1_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_2_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_3_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_4_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_1"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_2"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_3"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_1_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_reshape_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_reshape_where_divide_kernel"}, + {"module_name": "llama.cubin", "kernel_name": "tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel"} + ], + "blueprint": { + "inputs": ["token_ids", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107", "p108", "p109", "p110", "p111", "p112", "p113", "p114", "p115", "p116", "p117", "p118", "p119", "p120", "p121", "p122", "p123", "p124", "p125", "p126", "p127", "p128", "p129", "p130", "p131", "p132", "p133", "p134", "p135", "p136", "p137", "p138", "p139", "p140", "p141", "p142", "p143", "p144", "p145", "p146", "p147", "p148", "p149", "p150", "p151", "p152", "p153", "p154", "p155", "p156", "p157", "p158", "p159", "p160", "p161", "p162", "p163", "p164", "p165", "p166", "p167", "p168", "p169", "p170", "p171", "p172", "p173", "p174", "p175", "p176", "p177", "p178", "p179", "p180", "p181", "p182", "p183", "p184", "p185", "p186", "p187", "p188", "p189", "p190", "p191", "p192", "p193", "p194", "p195", "p196", "p197", "p198", "p199", "p200", "p201", "p202", "p203", "p204", "p205", "p206", "p207", "p208", "p209", "p210", "p211", "p212", "p213", "p214", "p215", "p216", "p217", "p218", "p219", "p220", "p221", "p222", "p223", "p224", "p225", "p226"], + "buffers": {"output": {"Absolute": 65667072}, "b2": {"Absolute": 1048576}, "b3": {"Absolute": 512}, "b5": {"Absolute": 1048576}, "b8": {"Absolute": 4194304}, "b12": {"Absolute": 4194304}, "b234": {"Absolute": 16384}, "b235": {"Absolute": 2097152}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_less_add_where_take_kernel", [{"Ptr": "b2"}, {"Ptr": "token_ids"}, {"Ptr": "p0"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p2"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p5"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}, {"Ptr": "p6"}, {"Ptr": "p7"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}, {"Ptr": "p8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p10"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}, {"Ptr": "p13"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p15"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p16"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p17"}, {"Ptr": "p18"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p20"}, {"Ptr": "p21"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p22"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p26"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p27"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p28"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p30"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p31"}, {"Ptr": "p32"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p34"}, {"Ptr": "p35"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p36"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p38"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p40"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p41"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p42"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p44"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p45"}, {"Ptr": "p46"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p47"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p48"}, {"Ptr": "p49"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p50"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p52"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p53"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p54"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p56"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p57"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p58"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p59"}, {"Ptr": "p60"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p61"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p62"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p64"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p65"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p66"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p67"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p68"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p70"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p71"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p72"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p73"}, {"Ptr": "p74"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p76"}, {"Ptr": "p77"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p78"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p79"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p80"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p82"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p83"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p84"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p85"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p86"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p87"}, {"Ptr": "p88"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p90"}, {"Ptr": "p91"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p92"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p94"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p95"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p96"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p97"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p98"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p99"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p100"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p101"}, {"Ptr": "p102"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p103"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p104"}, {"Ptr": "p105"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p106"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p108"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p109"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p110"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p111"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p112"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p113"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p114"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p115"}, {"Ptr": "p116"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p117"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p118"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p120"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p121"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p122"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p123"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p124"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p125"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p126"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p127"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p128"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p129"}, {"Ptr": "p130"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p131"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p132"}, {"Ptr": "p133"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p134"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p135"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p136"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p137"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p138"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p139"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p140"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p141"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p142"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p143"}, {"Ptr": "p144"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p145"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p146"}, {"Ptr": "p147"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p148"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p149"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p150"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p151"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p152"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p153"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p154"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p155"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p156"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p157"}, {"Ptr": "p158"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p159"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p160"}, {"Ptr": "p161"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p162"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p163"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p164"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p165"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p166"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p167"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p168"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p169"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p170"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p171"}, {"Ptr": "p172"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p173"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p174"}, {"Ptr": "p175"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p176"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p177"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p178"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p179"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p180"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p181"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p182"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p183"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p184"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p185"}, {"Ptr": "p186"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p187"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p188"}, {"Ptr": "p189"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p190"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p191"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p192"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p193"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p194"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p196"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p197"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p198"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "p199"}, {"Ptr": "p200"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b5"}, {"Ptr": "p201"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p202"}, {"Ptr": "p203"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "b12"}, {"Ptr": "b8"}, {"Ptr": "p204"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p205"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p206"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b5"}, {"Ptr": "p207"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "p208"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b2"}, {"Ptr": "p209"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "output"}, {"Ptr": "b12"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b2"}, {"Ptr": "output"}, {"Ptr": "p210"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b12"}, {"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p211"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p212"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "p213"}, {"Ptr": "p214"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "output"}, {"Ptr": "b2"}, {"Ptr": "p215"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b8"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_98b283a43dfeffd8__kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p216"}, {"Ptr": "p217"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_where_divide_kernel", [{"Ptr": "output"}, {"Ptr": "b8"}, {"Ptr": "p218"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b235"}, {"Ptr": "b234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b235"}, {"Ptr": "b234"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p219"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_transpose_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b5"}, {"Ptr": "b8"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p220"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "b2"}, {"Ptr": "p221"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p222"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b8"}, {"Ptr": "b5"}, {"Ptr": "p223"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "output"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b5"}, {"Ptr": "b12"}, {"Ptr": "p224"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b3"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b5"}, {"Ptr": "b3"}, {"Ptr": "output"}, {"Ptr": "p225"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_4_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}, {"Ptr": "p226"}], { + "grid_dim_x": {"Absolute": 128256}, + "grid_dim_y": {"Absolute": 128}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_llama_kv-full.json b/machine_interface/tests/data/cuda/test_gpu_llama_kv-full.json new file mode 100644 index 00000000..db314b3e --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_llama_kv-full.json @@ -0,0 +1,5216 @@ +{ + "modules": [{"module_name": "llama_kv.cubin", "path": "cuda/llama_kv.cubin"}, {"module_name": "llm_inference.cubin", "path": "cuda/llm_inference.cubin"}], + "kernels": [ + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_add_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_concatenate_transpose_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_greater_equal_reshape_where_divide_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_less_add_where_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_less_add_where_take_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_multiply_sum_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_1_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_1_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_2_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_3_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_4_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_1"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_2"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_3"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_broadcast_to_expand_dims_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_1_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_reshape_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_scatter_nd_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_scatter_nd_kernel_1"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_take_expand_dims_expand_dims_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_take_reshape_transpose_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_take_transpose_reshape_transpose_kernel"}, + {"module_name": "llm_inference.cubin", "kernel_name": "initialize_index"}, + {"module_name": "llm_inference.cubin", "kernel_name": "extract_token_from_tokens"}, + {"module_name": "llm_inference.cubin", "kernel_name": "argmax_logits_and_append"}, + {"module_name": "llm_inference.cubin", "kernel_name": "duplicate_cache"} + ], + "blueprint": { + "inputs": ["token_ids", "keys", "values", "cos", "sin", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107", "p108", "p109", "p110", "p111", "p112", "p113", "p114", "p115", "p116", "p117", "p118", "p119", "p120", "p121", "p122", "p123", "p124", "p125", "p126", "p127", "p128", "p129", "p130", "p131", "p132", "p133", "p134", "p135", "p136", "p137", "p138", "p139", "p140", "p141", "p142", "p143", "p144", "p145", "p146", "p147", "p148", "p149", "p150", "p151", "p152", "p153", "p154", "p155", "p156", "p157", "p158", "p159", "p160", "p161", "p162", "p163", "p164", "p165", "p166", "p167", "p168", "p169", "p170", "p171", "p172", "p173", "p174", "p175", "p176", "p177", "p178", "p179", "p180", "p181", "p182", "p183", "p184", "p185", "p186", "p187", "p188", "p189", "p190", "p191", "p192", "p193", "p194", "p195", "p196", "p197", "p198", "p199", "p200", "p201", "p202", "p203", "p204", "p205", "p206", "p207", "p208", "p209", "p210", "p211", "p212", "p213", "p214", "p215", "p216", "p217", "p218", "p219", "p220", "p221", "p222", "p223", "p224", "p225", "p226", "p227", "p228", "p229", "p230", "p231", "p232", "p233", "p234", "p235", "p236", "p237", "p238", "p239", "p240", "p241", "p242", "p243", "p244", "p245", "p246", "p247", "p248", "p249", "p250", "p251", "p252", "p253", "p254", "p255", "p256", "p257", "p258", "p259", "p260", "p261", "p262", "p263", "p264", "p265", "p266", "p267", "p268", "p269", "p270", "p271", "p272", "p273", "p274", "p275", "p276", "p277", "p278", "p279", "p280", "p281", "p282", "p283", "p284", "p285", "p286", "p287", "p288", "p289", "p290", "p291", "p292", "p293", "p294", "p295", "p296", "p297", "p298", "p299", "p300", "p301", "p302", "p303", "p304", "p305", "p306", "p307", "p308", "p309", "p310", "p311", "p312", "p313", "p314", "p315", "p316", "p317", "p318", "p319", "p320", "p321", "p322"], + "buffers": {"token_id": {"Absolute": 8}, "index": {"Absolute": 8}, "output": {"Absolute": 1048576}, "tmp_keys": {"Absolute": 16777216}, "tmp_values": {"Absolute": 16777216}, "b7": {"Absolute": 8}, "b24": {"Absolute": 32768}, "b25": {"Absolute": 8}, "b27": {"Absolute": 32768}, "b29": {"Absolute": 32768}, "b30": {"Absolute": 16384}, "b31": {"Absolute": 256}, "b32": {"Absolute": 256}, "b39": {"Absolute": 16384}, "b40": {"Absolute": 8192}, "b62": {"Absolute": 16777216}, "b343": {"Absolute": 4096}, "b344": {"Absolute": 16384}}, + "outputs": ["token_ids"], + "control_flow": [ + {"ExecKernel": ["initialize_index", [{"Ptr": "index"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"Repeat": [{"Absolute": 128}, [ + {"ExecKernel": ["extract_token_from_tokens", [{"Ptr": "token_ids"}, {"Ptr": "index"}, {"Ptr": "token_id"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_add_kernel", [{"Ptr": "b7"}, {"Ptr": "index"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_less_add_where_take_kernel", [{"Ptr": "b24"}, {"Ptr": "token_id"}, {"Ptr": "p16"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p17"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "p18"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b30"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_less_add_where_kernel", [{"Ptr": "b25"}, {"Ptr": "index"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_expand_dims_expand_dims_kernel", [{"Ptr": "b31"}, {"Ptr": "cos"}, {"Ptr": "b25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_expand_dims_expand_dims_kernel", [{"Ptr": "b32"}, {"Ptr": "sin"}, {"Ptr": "b25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b29"}, {"Ptr": "b30"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_broadcast_to_expand_dims_kernel", [{"Ptr": "b30"}, {"Ptr": "index"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p19"}, {"Ptr": "p20"}, {"Ptr": "p21"}, {"Ptr": "b30"}, {"Ptr": "p22"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "keys"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b39"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b29"}, {"Ptr": "p15"}, {"Ptr": "b7"}, {"Ptr": "b39"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p26"}, {"Ptr": "p27"}, {"Ptr": "p28"}, {"Ptr": "b30"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p30"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "values"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "p31"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "p34"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "p35"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p36"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p38"}, {"Ptr": "p39"}, {"Ptr": "p40"}, {"Ptr": "b30"}, {"Ptr": "p41"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p42"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b27"}, {"Ptr": "p14"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p44"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p45"}, {"Ptr": "p46"}, {"Ptr": "p47"}, {"Ptr": "b30"}, {"Ptr": "p48"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p49"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b39"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p50"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p52"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p53"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "p54"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b27"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p56"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p57"}, {"Ptr": "p58"}, {"Ptr": "p59"}, {"Ptr": "b30"}, {"Ptr": "p60"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p61"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b24"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p62"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p13"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p64"}, {"Ptr": "p65"}, {"Ptr": "p66"}, {"Ptr": "b30"}, {"Ptr": "p67"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p68"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p62"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p70"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p71"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p72"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p73"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b29"}, {"Ptr": "p74"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p76"}, {"Ptr": "p77"}, {"Ptr": "p78"}, {"Ptr": "b30"}, {"Ptr": "p79"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p80"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b27"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p12"}, {"Ptr": "b7"}, {"Ptr": "b24"}, {"Ptr": "p82"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p83"}, {"Ptr": "p84"}, {"Ptr": "p85"}, {"Ptr": "b30"}, {"Ptr": "p86"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p87"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p88"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p90"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p91"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p92"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p94"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p95"}, {"Ptr": "p96"}, {"Ptr": "p97"}, {"Ptr": "b30"}, {"Ptr": "p98"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p99"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b29"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p100"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p11"}, {"Ptr": "b7"}, {"Ptr": "b27"}, {"Ptr": "p101"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p102"}, {"Ptr": "p103"}, {"Ptr": "p104"}, {"Ptr": "b30"}, {"Ptr": "p105"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p106"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p100"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p108"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p109"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p110"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "p111"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b27"}, {"Ptr": "p112"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p113"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p114"}, {"Ptr": "p115"}, {"Ptr": "p116"}, {"Ptr": "b30"}, {"Ptr": "p117"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p118"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b24"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p10"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p120"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p121"}, {"Ptr": "p122"}, {"Ptr": "p123"}, {"Ptr": "b30"}, {"Ptr": "p124"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p125"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p126"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p127"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p128"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p129"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p130"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b29"}, {"Ptr": "p131"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p132"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p133"}, {"Ptr": "p134"}, {"Ptr": "p135"}, {"Ptr": "b30"}, {"Ptr": "p136"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p137"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b27"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p138"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p9"}, {"Ptr": "b7"}, {"Ptr": "b24"}, {"Ptr": "p139"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p140"}, {"Ptr": "p141"}, {"Ptr": "p142"}, {"Ptr": "b30"}, {"Ptr": "p143"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p144"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p138"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p145"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p146"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p147"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p148"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p149"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p150"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p151"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p152"}, {"Ptr": "p153"}, {"Ptr": "p154"}, {"Ptr": "b30"}, {"Ptr": "p155"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p156"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b29"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p157"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p8"}, {"Ptr": "b7"}, {"Ptr": "b27"}, {"Ptr": "p158"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p159"}, {"Ptr": "p160"}, {"Ptr": "p161"}, {"Ptr": "b30"}, {"Ptr": "p162"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p163"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p157"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p164"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p165"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p166"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p167"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "p168"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b27"}, {"Ptr": "p169"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p170"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p171"}, {"Ptr": "p172"}, {"Ptr": "p173"}, {"Ptr": "b30"}, {"Ptr": "p174"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p175"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b24"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p176"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p7"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p177"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p178"}, {"Ptr": "p179"}, {"Ptr": "p180"}, {"Ptr": "b30"}, {"Ptr": "p181"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p182"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p176"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p183"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p184"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p185"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p186"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p187"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b29"}, {"Ptr": "p188"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p189"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p190"}, {"Ptr": "p191"}, {"Ptr": "p192"}, {"Ptr": "b30"}, {"Ptr": "p193"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p194"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b27"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p6"}, {"Ptr": "b7"}, {"Ptr": "b24"}, {"Ptr": "p196"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p197"}, {"Ptr": "p198"}, {"Ptr": "p199"}, {"Ptr": "b30"}, {"Ptr": "p200"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p201"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p202"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p203"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p204"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p205"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p206"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p207"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p208"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p209"}, {"Ptr": "p210"}, {"Ptr": "p211"}, {"Ptr": "b30"}, {"Ptr": "p212"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p213"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b29"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p214"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p5"}, {"Ptr": "b7"}, {"Ptr": "b27"}, {"Ptr": "p215"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p216"}, {"Ptr": "p217"}, {"Ptr": "p218"}, {"Ptr": "b30"}, {"Ptr": "p219"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p220"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p214"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p221"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p222"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p223"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p224"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "p225"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b27"}, {"Ptr": "p226"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p227"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p228"}, {"Ptr": "p229"}, {"Ptr": "p230"}, {"Ptr": "b30"}, {"Ptr": "p231"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p232"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b24"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p233"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p4"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p235"}, {"Ptr": "p236"}, {"Ptr": "p237"}, {"Ptr": "b30"}, {"Ptr": "p238"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p239"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p233"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p240"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p241"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p242"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p243"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p244"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b29"}, {"Ptr": "p245"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p246"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p247"}, {"Ptr": "p248"}, {"Ptr": "p249"}, {"Ptr": "b30"}, {"Ptr": "p250"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p251"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b27"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p252"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p3"}, {"Ptr": "b7"}, {"Ptr": "b24"}, {"Ptr": "p253"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p254"}, {"Ptr": "p255"}, {"Ptr": "p256"}, {"Ptr": "b30"}, {"Ptr": "p257"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p258"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p252"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p259"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p260"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p261"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p262"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p263"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p264"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p265"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p266"}, {"Ptr": "p267"}, {"Ptr": "p268"}, {"Ptr": "b30"}, {"Ptr": "p269"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p270"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b29"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p271"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p2"}, {"Ptr": "b7"}, {"Ptr": "b27"}, {"Ptr": "p272"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p273"}, {"Ptr": "p274"}, {"Ptr": "p275"}, {"Ptr": "b30"}, {"Ptr": "p276"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p277"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p271"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p278"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p279"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p280"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p281"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "p282"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b27"}, {"Ptr": "p283"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p284"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p285"}, {"Ptr": "p286"}, {"Ptr": "p287"}, {"Ptr": "b30"}, {"Ptr": "p288"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p289"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b24"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p290"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p1"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p291"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p292"}, {"Ptr": "p293"}, {"Ptr": "p294"}, {"Ptr": "b30"}, {"Ptr": "p295"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p296"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p290"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p297"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p298"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p299"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p300"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p301"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b29"}, {"Ptr": "p302"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p303"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p304"}, {"Ptr": "p305"}, {"Ptr": "p306"}, {"Ptr": "b30"}, {"Ptr": "p307"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p308"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b27"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p309"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p0"}, {"Ptr": "b7"}, {"Ptr": "b24"}, {"Ptr": "p310"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p311"}, {"Ptr": "p312"}, {"Ptr": "p313"}, {"Ptr": "b30"}, {"Ptr": "p314"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p315"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p309"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b30"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b30"}, {"Ptr": "p316"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p317"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p318"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p319"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p320"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b30"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b7"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b7"}, {"Ptr": "b30"}, {"Ptr": "p321"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_4_kernel", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "p322"}], { + "grid_dim_x": {"Absolute": 128256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["argmax_logits_and_append", [{"Ptr": "output"}, {"Ptr": "index"}, {"Ptr": "token_ids"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["duplicate_cache", [{"Ptr": "keys"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["duplicate_cache", [{"Ptr": "values"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ]]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_llama_kv.json b/machine_interface/tests/data/cuda/test_gpu_llama_kv.json new file mode 100644 index 00000000..95557cbb --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_llama_kv.json @@ -0,0 +1,5165 @@ +{ + "modules": [{"module_name": "llama_kv.cubin", "path": "cuda/llama_kv.cubin"}], + "kernels": [ + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_add_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_concatenate_transpose_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_greater_equal_reshape_where_divide_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_less_add_where_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_less_add_where_take_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_multiply_sum_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_1_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_1_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_2_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_3_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_4_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_1"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_2"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_3"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_broadcast_to_expand_dims_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_1_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_reshape_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_scatter_nd_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_scatter_nd_kernel_1"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_take_expand_dims_expand_dims_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_take_reshape_transpose_kernel"}, + {"module_name": "llama_kv.cubin", "kernel_name": "tvmgen_default_fused_take_transpose_reshape_transpose_kernel"} + ], + "blueprint": { + "inputs": ["token_id", "index", "keys", "values", "cos", "sin", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107", "p108", "p109", "p110", "p111", "p112", "p113", "p114", "p115", "p116", "p117", "p118", "p119", "p120", "p121", "p122", "p123", "p124", "p125", "p126", "p127", "p128", "p129", "p130", "p131", "p132", "p133", "p134", "p135", "p136", "p137", "p138", "p139", "p140", "p141", "p142", "p143", "p144", "p145", "p146", "p147", "p148", "p149", "p150", "p151", "p152", "p153", "p154", "p155", "p156", "p157", "p158", "p159", "p160", "p161", "p162", "p163", "p164", "p165", "p166", "p167", "p168", "p169", "p170", "p171", "p172", "p173", "p174", "p175", "p176", "p177", "p178", "p179", "p180", "p181", "p182", "p183", "p184", "p185", "p186", "p187", "p188", "p189", "p190", "p191", "p192", "p193", "p194", "p195", "p196", "p197", "p198", "p199", "p200", "p201", "p202", "p203", "p204", "p205", "p206", "p207", "p208", "p209", "p210", "p211", "p212", "p213", "p214", "p215", "p216", "p217", "p218", "p219", "p220", "p221", "p222", "p223", "p224", "p225", "p226", "p227", "p228", "p229", "p230", "p231", "p232", "p233", "p234", "p235", "p236", "p237", "p238", "p239", "p240", "p241", "p242", "p243", "p244", "p245", "p246", "p247", "p248", "p249", "p250", "p251", "p252", "p253", "p254", "p255", "p256", "p257", "p258", "p259", "p260", "p261", "p262", "p263", "p264", "p265", "p266", "p267", "p268", "p269", "p270", "p271", "p272", "p273", "p274", "p275", "p276", "p277", "p278", "p279", "p280", "p281", "p282", "p283", "p284", "p285", "p286", "p287", "p288", "p289", "p290", "p291", "p292", "p293", "p294", "p295", "p296", "p297", "p298", "p299", "p300", "p301", "p302", "p303", "p304", "p305", "p306", "p307", "p308", "p309", "p310", "p311", "p312", "p313", "p314", "p315", "p316", "p317", "p318", "p319", "p320", "p321", "p322"], + "buffers": {"output": {"Absolute": 1048576}, "tmp_keys": {"Absolute": 16777216}, "tmp_values": {"Absolute": 16777216}, "b7": {"Absolute": 8}, "b24": {"Absolute": 32768}, "b25": {"Absolute": 8}, "b27": {"Absolute": 32768}, "b29": {"Absolute": 32768}, "b30": {"Absolute": 16384}, "b31": {"Absolute": 256}, "b32": {"Absolute": 256}, "b39": {"Absolute": 16384}, "b40": {"Absolute": 8192}, "b62": {"Absolute": 16777216}, "b343": {"Absolute": 4096}, "b344": {"Absolute": 16384}}, + "outputs": ["output", "tmp_keys", "tmp_values"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_add_kernel", [{"Ptr": "b7"}, {"Ptr": "index"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_less_add_where_take_kernel", [{"Ptr": "b24"}, {"Ptr": "token_id"}, {"Ptr": "p16"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p17"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "p18"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b30"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_less_add_where_kernel", [{"Ptr": "b25"}, {"Ptr": "index"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_expand_dims_expand_dims_kernel", [{"Ptr": "b31"}, {"Ptr": "cos"}, {"Ptr": "b25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_expand_dims_expand_dims_kernel", [{"Ptr": "b32"}, {"Ptr": "sin"}, {"Ptr": "b25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b29"}, {"Ptr": "b30"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_broadcast_to_expand_dims_kernel", [{"Ptr": "b30"}, {"Ptr": "index"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p19"}, {"Ptr": "p20"}, {"Ptr": "p21"}, {"Ptr": "b30"}, {"Ptr": "p22"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "keys"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b39"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b29"}, {"Ptr": "p15"}, {"Ptr": "b7"}, {"Ptr": "b39"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p26"}, {"Ptr": "p27"}, {"Ptr": "p28"}, {"Ptr": "b30"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p30"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "values"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "p31"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "p34"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "p35"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p36"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p38"}, {"Ptr": "p39"}, {"Ptr": "p40"}, {"Ptr": "b30"}, {"Ptr": "p41"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p42"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b27"}, {"Ptr": "p14"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p44"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p45"}, {"Ptr": "p46"}, {"Ptr": "p47"}, {"Ptr": "b30"}, {"Ptr": "p48"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p49"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b39"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p50"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p52"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p53"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "p54"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b27"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p56"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p57"}, {"Ptr": "p58"}, {"Ptr": "p59"}, {"Ptr": "b30"}, {"Ptr": "p60"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p61"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b24"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p62"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p13"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p64"}, {"Ptr": "p65"}, {"Ptr": "p66"}, {"Ptr": "b30"}, {"Ptr": "p67"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p68"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p62"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p70"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p71"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p72"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p73"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b29"}, {"Ptr": "p74"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p76"}, {"Ptr": "p77"}, {"Ptr": "p78"}, {"Ptr": "b30"}, {"Ptr": "p79"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p80"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b27"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p12"}, {"Ptr": "b7"}, {"Ptr": "b24"}, {"Ptr": "p82"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p83"}, {"Ptr": "p84"}, {"Ptr": "p85"}, {"Ptr": "b30"}, {"Ptr": "p86"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p87"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p88"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p90"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p91"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p92"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p94"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p95"}, {"Ptr": "p96"}, {"Ptr": "p97"}, {"Ptr": "b30"}, {"Ptr": "p98"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p99"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b29"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p100"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p11"}, {"Ptr": "b7"}, {"Ptr": "b27"}, {"Ptr": "p101"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p102"}, {"Ptr": "p103"}, {"Ptr": "p104"}, {"Ptr": "b30"}, {"Ptr": "p105"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p106"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p100"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p108"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p109"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p110"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "p111"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b27"}, {"Ptr": "p112"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p113"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p114"}, {"Ptr": "p115"}, {"Ptr": "p116"}, {"Ptr": "b30"}, {"Ptr": "p117"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p118"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b24"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p10"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p120"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p121"}, {"Ptr": "p122"}, {"Ptr": "p123"}, {"Ptr": "b30"}, {"Ptr": "p124"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p125"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p126"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p127"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p128"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p129"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p130"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b29"}, {"Ptr": "p131"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p132"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p133"}, {"Ptr": "p134"}, {"Ptr": "p135"}, {"Ptr": "b30"}, {"Ptr": "p136"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p137"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b27"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p138"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p9"}, {"Ptr": "b7"}, {"Ptr": "b24"}, {"Ptr": "p139"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p140"}, {"Ptr": "p141"}, {"Ptr": "p142"}, {"Ptr": "b30"}, {"Ptr": "p143"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p144"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p138"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p145"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p146"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p147"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p148"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p149"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p150"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p151"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p152"}, {"Ptr": "p153"}, {"Ptr": "p154"}, {"Ptr": "b30"}, {"Ptr": "p155"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p156"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b29"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p157"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p8"}, {"Ptr": "b7"}, {"Ptr": "b27"}, {"Ptr": "p158"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p159"}, {"Ptr": "p160"}, {"Ptr": "p161"}, {"Ptr": "b30"}, {"Ptr": "p162"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p163"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p157"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p164"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p165"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p166"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p167"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "p168"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b27"}, {"Ptr": "p169"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p170"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p171"}, {"Ptr": "p172"}, {"Ptr": "p173"}, {"Ptr": "b30"}, {"Ptr": "p174"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p175"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b24"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p176"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p7"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p177"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p178"}, {"Ptr": "p179"}, {"Ptr": "p180"}, {"Ptr": "b30"}, {"Ptr": "p181"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p182"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p176"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p183"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p184"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p185"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p186"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p187"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b29"}, {"Ptr": "p188"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p189"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p190"}, {"Ptr": "p191"}, {"Ptr": "p192"}, {"Ptr": "b30"}, {"Ptr": "p193"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p194"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b27"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p6"}, {"Ptr": "b7"}, {"Ptr": "b24"}, {"Ptr": "p196"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p197"}, {"Ptr": "p198"}, {"Ptr": "p199"}, {"Ptr": "b30"}, {"Ptr": "p200"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p201"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p202"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p203"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p204"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p205"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p206"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p207"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p208"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p209"}, {"Ptr": "p210"}, {"Ptr": "p211"}, {"Ptr": "b30"}, {"Ptr": "p212"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p213"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b29"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p214"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p5"}, {"Ptr": "b7"}, {"Ptr": "b27"}, {"Ptr": "p215"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p216"}, {"Ptr": "p217"}, {"Ptr": "p218"}, {"Ptr": "b30"}, {"Ptr": "p219"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p220"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p214"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p221"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p222"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p223"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p224"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "p225"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b27"}, {"Ptr": "p226"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p227"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p228"}, {"Ptr": "p229"}, {"Ptr": "p230"}, {"Ptr": "b30"}, {"Ptr": "p231"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p232"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b24"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p233"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p4"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p234"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p235"}, {"Ptr": "p236"}, {"Ptr": "p237"}, {"Ptr": "b30"}, {"Ptr": "p238"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p239"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p233"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p240"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p241"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p242"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p243"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p244"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b29"}, {"Ptr": "p245"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p246"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p247"}, {"Ptr": "p248"}, {"Ptr": "p249"}, {"Ptr": "b30"}, {"Ptr": "p250"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p251"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b27"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p252"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p3"}, {"Ptr": "b7"}, {"Ptr": "b24"}, {"Ptr": "p253"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p254"}, {"Ptr": "p255"}, {"Ptr": "p256"}, {"Ptr": "b30"}, {"Ptr": "p257"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p258"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p252"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p259"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p260"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p261"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p262"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p263"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b24"}, {"Ptr": "p264"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p265"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p266"}, {"Ptr": "p267"}, {"Ptr": "p268"}, {"Ptr": "b30"}, {"Ptr": "p269"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p270"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b29"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p271"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b27"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p2"}, {"Ptr": "b7"}, {"Ptr": "b27"}, {"Ptr": "p272"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p273"}, {"Ptr": "p274"}, {"Ptr": "p275"}, {"Ptr": "b30"}, {"Ptr": "p276"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p277"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p271"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p278"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p279"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p280"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p281"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "p282"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b27"}, {"Ptr": "p283"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p284"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p285"}, {"Ptr": "p286"}, {"Ptr": "p287"}, {"Ptr": "b30"}, {"Ptr": "p288"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p289"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b24"}, {"Ptr": "b29"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b24"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p290"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b29"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p1"}, {"Ptr": "b7"}, {"Ptr": "b29"}, {"Ptr": "p291"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p292"}, {"Ptr": "p293"}, {"Ptr": "p294"}, {"Ptr": "b30"}, {"Ptr": "p295"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p296"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_keys"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "b62"}, {"Ptr": "p290"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b24"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}, {"Ptr": "p297"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p298"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "p299"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p300"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b27"}, {"Ptr": "b29"}, {"Ptr": "b24"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "p301"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b29"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b29"}, {"Ptr": "p302"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p303"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_res_1bda687858ca57c0__kernel", [{"Ptr": "b39"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p304"}, {"Ptr": "p305"}, {"Ptr": "p306"}, {"Ptr": "b30"}, {"Ptr": "p307"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p308"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_1_kernel", [{"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_strided_slice_negative_strided_slice_concatenate_multiply_add_exp_c89bec4d53200e45__kernel", [{"Ptr": "b27"}, {"Ptr": "b24"}, {"Ptr": "b31"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "tmp_values"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b27"}, {"Ptr": "tmp_keys"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_transpose_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_keys"}, {"Ptr": "p309"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b24"}, {"Ptr": "b39"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_greater_equal_reshape_where_divide_kernel", [{"Ptr": "b39"}, {"Ptr": "p0"}, {"Ptr": "b7"}, {"Ptr": "b24"}, {"Ptr": "p310"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b344"}, {"Ptr": "b343"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b344"}, {"Ptr": "b343"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_concatenate_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "p311"}, {"Ptr": "p312"}, {"Ptr": "p313"}, {"Ptr": "b30"}, {"Ptr": "p314"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "p315"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_expand_dims_tile_reshape_strided_slice_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b62"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 4194304}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "tmp_values"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_kernel", [{"Ptr": "output"}, {"Ptr": "tmp_values"}, {"Ptr": "p309"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b30"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b40"}, {"Ptr": "b30"}, {"Ptr": "p316"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b40"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b25"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b25"}, {"Ptr": "b39"}, {"Ptr": "p317"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b24"}, {"Ptr": "b40"}, {"Ptr": "p318"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b27"}, {"Ptr": "b40"}, {"Ptr": "p319"}], { + "grid_dim_x": {"Absolute": 8192}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_sigmoid_multiply_reshape_multiply_reshape_kernel", [{"Ptr": "b29"}, {"Ptr": "b24"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 8}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_3_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p320"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_kernel", [{"Ptr": "b30"}, {"Ptr": "b40"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_multiply_sum_kernel", [{"Ptr": "b7"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sqrt_multiply_add_divide_multiply_reshape_kernel", [{"Ptr": "b40"}, {"Ptr": "b7"}, {"Ptr": "b30"}, {"Ptr": "p321"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_4_kernel", [{"Ptr": "output"}, {"Ptr": "b40"}, {"Ptr": "p322"}], { + "grid_dim_x": {"Absolute": 128256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_llm.json b/machine_interface/tests/data/cuda/test_gpu_llm.json new file mode 100644 index 00000000..641f50b9 --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_llm.json @@ -0,0 +1,3552 @@ +{ + "modules": [{"module_name": "llm.cubin", "path": "cuda/llm.cubin"}], + "kernels": [ + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_equal_logical_not_where_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_scatter_nd_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_scatter_nd_kernel_1"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__100_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__101_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__102_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__103_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__104_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__105_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__106_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__107_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__108_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__109_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__10_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__110_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__111_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__112_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__113_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__114_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__115_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__116_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__117_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__118_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__119_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__11_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__120_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__121_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__122_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__123_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__124_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__125_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__12_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__13_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__14_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__15_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__16_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__17_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__18_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__19_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__1_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__20_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__21_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__22_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__23_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__24_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__25_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__26_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__27_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__28_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__29_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__2_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__30_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__31_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__32_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__33_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__34_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__35_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__36_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__37_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__38_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__39_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__3_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__40_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__41_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__42_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__43_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__44_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__45_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__46_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__47_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__48_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__49_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__4_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__50_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__51_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__52_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__53_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__54_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__55_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__56_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__57_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__58_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__59_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__5_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__60_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__61_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__62_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__63_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__64_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__65_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__66_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__67_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__68_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__69_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__6_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__70_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__71_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__72_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__73_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__74_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__75_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__76_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__77_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__78_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__79_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__7_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__80_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__81_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__82_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__83_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__84_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__85_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__86_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__87_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__88_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__89_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__8_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__90_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__91_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__92_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__93_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__94_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__95_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__96_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__97_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__98_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__99_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__9_kernel"}, + {"module_name": "llm.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__kernel"} + ], + "blueprint": { + "inputs": ["token_ids", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107", "p108", "p109", "p110", "p111", "p112", "p113", "p114", "p115", "p116", "p117", "p118", "p119", "p120", "p121", "p122", "p123", "p124", "p125", "p126", "p127", "p128", "p129", "p130", "p131", "p132", "p133", "p134", "p135", "p136", "p137", "p138", "p139", "p140", "p141", "p142", "p143", "p144", "p145", "p146", "p147", "p148", "p149", "p150", "p151", "p152", "p153", "p154", "p155", "p156", "p157", "p158", "p159", "p160", "p161", "p162", "p163", "p164", "p165", "p166", "p167", "p168", "p169", "p170", "p171", "p172", "p173", "p174", "p175", "p176", "p177", "p178", "p179", "p180", "p181", "p182", "p183", "p184", "p185", "p186", "p187", "p188", "p189", "p190", "p191", "p192", "p193", "p194", "p195", "p196", "p197", "p198", "p199", "p200", "p201", "p202", "p203", "p204", "p205", "p206", "p207", "p208", "p209", "p210", "p211", "p212", "p213", "p214", "p215", "p216", "p217", "p218", "p219", "p220", "p221", "p222", "p223", "p224", "p225", "p226", "p227", "p228", "p229", "p230", "p231", "p232", "p233", "p234", "p235", "p236", "p237", "p238", "p239", "p240", "p241", "p242", "p243", "p244", "p245", "p246", "p247", "p248", "p249", "p250", "p251"], + "buffers": {"output": {"Absolute": 1024}, "b4": {"Absolute": 8}, "b5": {"Absolute": 1024}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_equal_logical_not_where_kernel", [{"Ptr": "output"}, {"Ptr": "token_ids"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p0"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__1_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p2"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__2_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p4"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__3_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p7"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p6"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__4_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p8"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__5_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p10"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__6_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p13"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p12"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__7_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p15"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p14"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__8_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p17"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p16"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__9_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p18"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__10_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p21"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p20"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__11_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p22"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__12_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p24"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__13_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p26"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__14_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p28"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__15_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p31"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p30"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__16_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p32"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__17_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p35"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p34"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__18_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p36"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__19_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p38"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__20_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p40"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__21_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p42"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__22_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p45"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p44"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__23_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p46"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__24_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p49"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p48"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__25_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p50"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__26_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p52"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__27_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p54"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__28_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p57"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p56"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__29_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p58"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__30_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p60"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__31_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p62"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__32_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p64"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__33_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p67"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p66"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__34_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p68"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__35_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p70"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__36_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p73"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p72"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__37_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p74"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__38_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p76"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__39_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p79"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p78"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__40_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p80"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__41_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p82"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__42_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p85"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p84"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__43_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p87"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p86"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__44_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p88"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__45_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p91"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p90"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__46_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p92"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__47_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p94"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__48_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p97"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p96"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__49_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p99"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p98"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__50_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p100"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__51_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p103"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p102"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__52_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p105"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p104"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__53_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p106"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__54_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p109"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p108"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__55_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p111"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p110"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__56_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p112"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__57_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p115"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p114"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__58_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p117"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p116"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__59_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p118"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__60_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p121"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p120"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__61_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p123"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p122"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__62_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p125"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p124"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__63_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p127"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p126"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__64_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p129"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p128"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__65_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p131"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p130"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__66_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p133"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p132"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__67_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p135"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p134"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__68_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p137"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p136"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__69_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p139"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p138"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__70_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p141"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p140"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__71_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p143"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p142"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__72_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p145"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p144"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__73_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p147"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p146"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__74_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p149"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p148"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__75_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p151"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p150"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__76_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p153"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p152"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__77_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p155"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p154"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__78_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p157"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p156"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__79_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p159"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p158"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__80_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p161"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p160"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__81_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p163"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p162"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__82_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p165"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p164"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__83_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p167"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p166"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__84_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p169"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p168"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__85_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p171"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p170"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__86_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p173"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p172"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__87_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p175"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p174"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__88_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p177"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p176"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__89_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p179"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p178"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__90_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p181"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p180"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__91_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p183"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p182"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__92_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p185"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p184"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__93_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p187"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p186"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__94_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p189"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p188"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__95_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p191"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p190"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__96_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p193"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p192"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__97_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p194"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__98_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p197"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p196"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__99_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p199"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p198"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__100_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p201"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p200"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__101_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p203"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p202"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__102_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p205"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p204"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__103_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p207"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p206"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__104_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p209"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p208"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__105_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p211"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p210"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__106_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p213"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p212"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__107_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p215"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p214"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__108_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p217"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p216"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__109_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p219"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p218"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__110_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p221"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p220"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__111_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p223"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p222"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__112_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p225"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p224"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__113_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p227"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p226"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__114_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p229"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p228"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__115_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p231"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p230"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__116_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p233"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p232"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__117_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p234"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__118_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p237"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p236"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__119_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p239"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p238"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__120_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p241"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p240"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__121_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p243"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p242"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__122_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p245"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p244"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__123_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p247"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p246"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__124_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "output"}, {"Ptr": "p249"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p248"}, {"Ptr": "b4"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_where_reshape_broadcast_to_broadcast_to_reshap_ea80fd4c09035e54__125_kernel", [{"Ptr": "b4"}, {"Ptr": "token_ids"}, {"Ptr": "b5"}, {"Ptr": "p251"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b5"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p250"}, {"Ptr": "b4"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_lstm.json b/machine_interface/tests/data/cuda/test_gpu_lstm.json new file mode 100644 index 00000000..cf807ad4 --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_lstm.json @@ -0,0 +1,11222 @@ +{ + "modules": [{"module_name": "lstm.cubin", "path": "cuda/lstm.cubin"}], + "kernels": [ + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_1_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_2_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_sigmoid_tanh_multiply_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_kernel_1"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_kernel_2"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_kernel_3"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_1"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_10"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_11"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_12"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_13"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_14"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_15"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_16"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_17"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_18"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_19"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_2"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_20"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_21"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_22"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_23"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_24"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_25"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_26"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_27"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_3"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_4"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_5"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_6"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_7"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_8"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_9"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_4e0306112785fa15__kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_10_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_11_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_12_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_13_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_14_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_15_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_16_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_17_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_18_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_19_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_1_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_20_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_21_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_22_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_23_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_24_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_25_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_26_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_27_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_28_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_29_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_2_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_30_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_31_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_32_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_33_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_34_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_35_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_36_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_37_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_38_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_39_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_3_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_40_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_41_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_42_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_43_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_44_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_45_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_46_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_47_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_48_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_49_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_4_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_50_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_51_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_52_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_53_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_54_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_55_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_5_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_6_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_7_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_8_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_9_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_squeeze_concatenate_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_1"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_10"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_11"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_12"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_13"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_14"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_15"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_16"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_17"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_18"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_19"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_2"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_20"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_21"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_22"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_23"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_24"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_25"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_26"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_27"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_3"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_4"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_5"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_6"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_7"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_8"}, + {"module_name": "lstm.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_9"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107", "p108", "p109", "p110", "p111", "p112", "p113", "p114", "p115", "p116", "p117", "p118", "p119", "p120", "p121", "p122", "p123", "p124", "p125", "p126", "p127", "p128", "p129", "p130", "p131", "p132", "p133", "p134", "p135", "p136", "p137", "p138", "p139", "p140", "p141", "p142", "p143", "p144", "p145", "p146", "p147", "p148", "p149", "p150", "p151", "p152", "p153", "p154", "p155", "p156", "p157", "p158", "p159", "p160", "p161", "p162", "p163", "p164", "p165", "p166", "p167", "p168", "p169", "p170", "p171", "p172", "p173", "p174", "p175", "p176", "p177", "p178", "p179", "p180", "p181", "p182", "p183", "p184", "p185", "p186", "p187", "p188", "p189", "p190", "p191", "p192", "p193", "p194", "p195", "p196", "p197", "p198", "p199", "p200", "p201", "p202", "p203", "p204", "p205", "p206", "p207", "p208", "p209", "p210", "p211", "p212", "p213", "p214", "p215", "p216", "p217", "p218", "p219", "p220", "p221", "p222", "p223", "p224", "p225", "p226", "p227", "p228", "p229", "p230", "p231", "p232", "p233", "p234", "p235", "p236", "p237", "p238", "p239", "p240", "p241", "p242", "p243", "p244", "p245", "p246", "p247", "p248", "p249", "p250", "p251", "p252", "p253", "p254", "p255", "p256", "p257", "p258", "p259", "p260", "p261", "p262", "p263", "p264", "p265", "p266", "p267", "p268", "p269", "p270", "p271", "p272", "p273", "p274", "p275", "p276", "p277", "p278", "p279", "p280", "p281", "p282", "p283", "p284", "p285", "p286", "p287", "p288", "p289", "p290", "p291"], + "buffers": {"output": {"Absolute": 512}, "b1": {"Absolute": 512}, "b2": {"Absolute": 2048}, "b3": {"Absolute": 512}, "b4": {"Absolute": 512}, "b5": {"Absolute": 512}, "b6": {"Absolute": 512}, "b7": {"Absolute": 512}, "b9": {"Absolute": 512}, "b10": {"Absolute": 512}, "b11": {"Absolute": 512}, "b12": {"Absolute": 512}, "b13": {"Absolute": 512}, "b14": {"Absolute": 512}, "b15": {"Absolute": 512}, "b16": {"Absolute": 512}, "b17": {"Absolute": 512}, "b18": {"Absolute": 512}, "b19": {"Absolute": 512}, "b20": {"Absolute": 512}, "b21": {"Absolute": 512}, "b22": {"Absolute": 512}, "b23": {"Absolute": 512}, "b24": {"Absolute": 512}, "b25": {"Absolute": 512}, "b26": {"Absolute": 512}, "b27": {"Absolute": 512}, "b28": {"Absolute": 512}, "b30": {"Absolute": 2048}, "b33": {"Absolute": 2048}, "b34": {"Absolute": 512}, "b35": {"Absolute": 512}, "b36": {"Absolute": 2048}, "b38": {"Absolute": 512}, "b41": {"Absolute": 2048}, "b44": {"Absolute": 2048}, "b47": {"Absolute": 2048}, "b50": {"Absolute": 2048}, "b53": {"Absolute": 2048}, "b56": {"Absolute": 2048}, "b59": {"Absolute": 2048}, "b62": {"Absolute": 2048}, "b65": {"Absolute": 2048}, "b68": {"Absolute": 2048}, "b71": {"Absolute": 2048}, "b74": {"Absolute": 2048}, "b77": {"Absolute": 2048}, "b80": {"Absolute": 2048}, "b83": {"Absolute": 2048}, "b86": {"Absolute": 2048}, "b89": {"Absolute": 2048}, "b92": {"Absolute": 2048}, "b95": {"Absolute": 2048}, "b98": {"Absolute": 2048}, "b101": {"Absolute": 2048}, "b104": {"Absolute": 2048}, "b107": {"Absolute": 2048}, "b110": {"Absolute": 2048}, "b113": {"Absolute": 2048}, "b116": {"Absolute": 2048}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel", [{"Ptr": "b1"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_1", [{"Ptr": "b2"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_2", [{"Ptr": "b3"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_3", [{"Ptr": "b4"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_4", [{"Ptr": "b5"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_5", [{"Ptr": "b6"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_6", [{"Ptr": "b7"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_7", [{"Ptr": "output"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_8", [{"Ptr": "b9"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_9", [{"Ptr": "b10"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_10", [{"Ptr": "b11"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_11", [{"Ptr": "b12"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_12", [{"Ptr": "b13"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_13", [{"Ptr": "b14"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_14", [{"Ptr": "b15"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_15", [{"Ptr": "b16"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_16", [{"Ptr": "b17"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_17", [{"Ptr": "b18"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_18", [{"Ptr": "b19"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_19", [{"Ptr": "b20"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_20", [{"Ptr": "b21"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_21", [{"Ptr": "b22"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_22", [{"Ptr": "b23"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_23", [{"Ptr": "b24"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_24", [{"Ptr": "b25"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_25", [{"Ptr": "b26"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_26", [{"Ptr": "b27"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_27", [{"Ptr": "b28"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_kernel", [{"Ptr": "b30"}, {"Ptr": "b1"}, {"Ptr": "p0"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b33"}, {"Ptr": "b30"}, {"Ptr": "p1"}, {"Ptr": "p2"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b30"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b34"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b35"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b36"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b33"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b35"}, {"Ptr": "p3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b38"}, {"Ptr": "b36"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_1_kernel", [{"Ptr": "b30"}, {"Ptr": "b2"}, {"Ptr": "b38"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b30"}, {"Ptr": "p4"}, {"Ptr": "p5"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b41"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b33"}, {"Ptr": "b41"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_2_kernel", [{"Ptr": "b30"}, {"Ptr": "b3"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b41"}, {"Ptr": "b30"}, {"Ptr": "p6"}, {"Ptr": "p7"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b44"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b41"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b36"}, {"Ptr": "b44"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_3_kernel", [{"Ptr": "b30"}, {"Ptr": "b4"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b30"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b47"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b41"}, {"Ptr": "b47"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_4_kernel", [{"Ptr": "b30"}, {"Ptr": "b5"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b47"}, {"Ptr": "b30"}, {"Ptr": "p10"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b50"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b47"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b44"}, {"Ptr": "b50"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_5_kernel", [{"Ptr": "b30"}, {"Ptr": "b6"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b30"}, {"Ptr": "p12"}, {"Ptr": "p13"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b53"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b47"}, {"Ptr": "b53"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_6_kernel", [{"Ptr": "b30"}, {"Ptr": "b7"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b53"}, {"Ptr": "b30"}, {"Ptr": "p14"}, {"Ptr": "p15"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b56"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b53"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b50"}, {"Ptr": "b56"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_7_kernel", [{"Ptr": "b30"}, {"Ptr": "output"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b56"}, {"Ptr": "b30"}, {"Ptr": "p16"}, {"Ptr": "p17"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b59"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b56"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b53"}, {"Ptr": "b59"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_8_kernel", [{"Ptr": "b30"}, {"Ptr": "b9"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b59"}, {"Ptr": "b30"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b62"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b59"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b56"}, {"Ptr": "b62"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_9_kernel", [{"Ptr": "b30"}, {"Ptr": "b10"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b30"}, {"Ptr": "p20"}, {"Ptr": "p21"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b65"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b59"}, {"Ptr": "b65"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_10_kernel", [{"Ptr": "b30"}, {"Ptr": "b11"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b65"}, {"Ptr": "b30"}, {"Ptr": "p22"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b68"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b65"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b62"}, {"Ptr": "b68"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_11_kernel", [{"Ptr": "b30"}, {"Ptr": "b12"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b68"}, {"Ptr": "b30"}, {"Ptr": "p24"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b71"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b68"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b65"}, {"Ptr": "b71"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_12_kernel", [{"Ptr": "b30"}, {"Ptr": "b13"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b71"}, {"Ptr": "b30"}, {"Ptr": "p26"}, {"Ptr": "p27"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b74"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b71"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b68"}, {"Ptr": "b74"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_13_kernel", [{"Ptr": "b30"}, {"Ptr": "b14"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b74"}, {"Ptr": "b30"}, {"Ptr": "p28"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b77"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b74"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b71"}, {"Ptr": "b77"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_14_kernel", [{"Ptr": "b30"}, {"Ptr": "b15"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b77"}, {"Ptr": "b30"}, {"Ptr": "p30"}, {"Ptr": "p31"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b80"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b77"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b74"}, {"Ptr": "b80"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_15_kernel", [{"Ptr": "b30"}, {"Ptr": "b16"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b80"}, {"Ptr": "b30"}, {"Ptr": "p32"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b83"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b80"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b77"}, {"Ptr": "b83"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_16_kernel", [{"Ptr": "b30"}, {"Ptr": "b17"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b83"}, {"Ptr": "b30"}, {"Ptr": "p34"}, {"Ptr": "p35"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b86"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b83"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b80"}, {"Ptr": "b86"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_17_kernel", [{"Ptr": "b30"}, {"Ptr": "b18"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b86"}, {"Ptr": "b30"}, {"Ptr": "p36"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b89"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b86"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b83"}, {"Ptr": "b89"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_18_kernel", [{"Ptr": "b30"}, {"Ptr": "b19"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b89"}, {"Ptr": "b30"}, {"Ptr": "p38"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b92"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b89"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b86"}, {"Ptr": "b92"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_19_kernel", [{"Ptr": "b30"}, {"Ptr": "b20"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b92"}, {"Ptr": "b30"}, {"Ptr": "p40"}, {"Ptr": "p41"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b95"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b92"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b89"}, {"Ptr": "b95"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_20_kernel", [{"Ptr": "b30"}, {"Ptr": "b21"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b95"}, {"Ptr": "b30"}, {"Ptr": "p42"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b98"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b95"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b92"}, {"Ptr": "b98"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_21_kernel", [{"Ptr": "b30"}, {"Ptr": "b22"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b98"}, {"Ptr": "b30"}, {"Ptr": "p44"}, {"Ptr": "p45"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b101"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b98"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b95"}, {"Ptr": "b101"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_22_kernel", [{"Ptr": "b30"}, {"Ptr": "b23"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b101"}, {"Ptr": "b30"}, {"Ptr": "p46"}, {"Ptr": "p47"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b104"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b101"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b98"}, {"Ptr": "b104"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_23_kernel", [{"Ptr": "b30"}, {"Ptr": "b24"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b104"}, {"Ptr": "b30"}, {"Ptr": "p48"}, {"Ptr": "p49"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b107"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b104"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b101"}, {"Ptr": "b107"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_24_kernel", [{"Ptr": "b30"}, {"Ptr": "b25"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b107"}, {"Ptr": "b30"}, {"Ptr": "p50"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b110"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b107"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b104"}, {"Ptr": "b110"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_25_kernel", [{"Ptr": "b30"}, {"Ptr": "b26"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b110"}, {"Ptr": "b30"}, {"Ptr": "p52"}, {"Ptr": "p53"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b113"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b110"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b107"}, {"Ptr": "b113"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_26_kernel", [{"Ptr": "b30"}, {"Ptr": "b27"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b113"}, {"Ptr": "b30"}, {"Ptr": "p54"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b34"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b35"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b116"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b113"}, {"Ptr": "b35"}, {"Ptr": "b34"}, {"Ptr": "b30"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b110"}, {"Ptr": "b116"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_27_kernel", [{"Ptr": "b30"}, {"Ptr": "b28"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 156}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b116"}, {"Ptr": "b30"}, {"Ptr": "p56"}, {"Ptr": "p57"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel", [{"Ptr": "b34"}, {"Ptr": "b38"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_1", [{"Ptr": "b35"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_2", [{"Ptr": "b30"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_3", [{"Ptr": "b28"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_4", [{"Ptr": "b27"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_5", [{"Ptr": "b26"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_6", [{"Ptr": "b25"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_7", [{"Ptr": "b24"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_8", [{"Ptr": "b23"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_9", [{"Ptr": "b22"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_10", [{"Ptr": "b21"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_11", [{"Ptr": "b20"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_12", [{"Ptr": "b19"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_13", [{"Ptr": "b18"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_14", [{"Ptr": "b17"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_15", [{"Ptr": "b16"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_16", [{"Ptr": "b15"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_17", [{"Ptr": "b14"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_18", [{"Ptr": "b13"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_19", [{"Ptr": "b12"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_20", [{"Ptr": "b11"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_21", [{"Ptr": "b10"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_22", [{"Ptr": "b9"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_23", [{"Ptr": "output"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_24", [{"Ptr": "b7"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_25", [{"Ptr": "b6"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_26", [{"Ptr": "b5"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_27", [{"Ptr": "b4"}, {"Ptr": "b116"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_28_kernel", [{"Ptr": "b116"}, {"Ptr": "b34"}, {"Ptr": "p58"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b113"}, {"Ptr": "b116"}, {"Ptr": "p59"}, {"Ptr": "p60"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b33"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b36"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b41"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b33"}, {"Ptr": "b38"}, {"Ptr": "b36"}, {"Ptr": "p61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b47"}, {"Ptr": "b41"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_29_kernel", [{"Ptr": "b50"}, {"Ptr": "b35"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b50"}, {"Ptr": "p62"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b56"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b59"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b62"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b65"}, {"Ptr": "b56"}, {"Ptr": "b38"}, {"Ptr": "b59"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b68"}, {"Ptr": "b62"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_30_kernel", [{"Ptr": "b71"}, {"Ptr": "b30"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b74"}, {"Ptr": "b71"}, {"Ptr": "p64"}, {"Ptr": "p65"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b77"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b80"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b83"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b86"}, {"Ptr": "b77"}, {"Ptr": "b38"}, {"Ptr": "b80"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b89"}, {"Ptr": "b83"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_31_kernel", [{"Ptr": "b92"}, {"Ptr": "b28"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b95"}, {"Ptr": "b92"}, {"Ptr": "p66"}, {"Ptr": "p67"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b98"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b101"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b104"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b107"}, {"Ptr": "b98"}, {"Ptr": "b38"}, {"Ptr": "b101"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b110"}, {"Ptr": "b104"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_32_kernel", [{"Ptr": "b116"}, {"Ptr": "b27"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b113"}, {"Ptr": "b116"}, {"Ptr": "p68"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b33"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b36"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b41"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b33"}, {"Ptr": "b38"}, {"Ptr": "b36"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b53"}, {"Ptr": "b41"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_33_kernel", [{"Ptr": "b44"}, {"Ptr": "b26"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b56"}, {"Ptr": "b44"}, {"Ptr": "p70"}, {"Ptr": "p71"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b59"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b71"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b74"}, {"Ptr": "b59"}, {"Ptr": "b38"}, {"Ptr": "b62"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b65"}, {"Ptr": "b71"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_34_kernel", [{"Ptr": "b77"}, {"Ptr": "b25"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b80"}, {"Ptr": "b77"}, {"Ptr": "p72"}, {"Ptr": "p73"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b83"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b92"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b95"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b86"}, {"Ptr": "b83"}, {"Ptr": "b38"}, {"Ptr": "b92"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b98"}, {"Ptr": "b95"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_35_kernel", [{"Ptr": "b101"}, {"Ptr": "b24"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b104"}, {"Ptr": "b101"}, {"Ptr": "p74"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b113"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b107"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b33"}, {"Ptr": "b116"}, {"Ptr": "b38"}, {"Ptr": "b113"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b36"}, {"Ptr": "b107"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_36_kernel", [{"Ptr": "b41"}, {"Ptr": "b23"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b41"}, {"Ptr": "p76"}, {"Ptr": "p77"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b56"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b50"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b59"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b56"}, {"Ptr": "b38"}, {"Ptr": "b50"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b71"}, {"Ptr": "b59"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_37_kernel", [{"Ptr": "b77"}, {"Ptr": "b22"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b80"}, {"Ptr": "b77"}, {"Ptr": "p78"}, {"Ptr": "p79"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b74"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b83"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b92"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b95"}, {"Ptr": "b74"}, {"Ptr": "b38"}, {"Ptr": "b83"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b101"}, {"Ptr": "b92"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_38_kernel", [{"Ptr": "b104"}, {"Ptr": "b21"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b86"}, {"Ptr": "b104"}, {"Ptr": "p80"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b113"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b107"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b41"}, {"Ptr": "b116"}, {"Ptr": "b38"}, {"Ptr": "b113"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b44"}, {"Ptr": "b107"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_39_kernel", [{"Ptr": "b33"}, {"Ptr": "b20"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b56"}, {"Ptr": "b33"}, {"Ptr": "p82"}, {"Ptr": "p83"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b59"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b77"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b80"}, {"Ptr": "b50"}, {"Ptr": "b38"}, {"Ptr": "b59"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b62"}, {"Ptr": "b77"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_40_kernel", [{"Ptr": "b74"}, {"Ptr": "b19"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b83"}, {"Ptr": "b74"}, {"Ptr": "p84"}, {"Ptr": "p85"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b92"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b104"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b86"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b95"}, {"Ptr": "b92"}, {"Ptr": "b38"}, {"Ptr": "b104"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b116"}, {"Ptr": "b86"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_41_kernel", [{"Ptr": "b113"}, {"Ptr": "b18"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b107"}, {"Ptr": "b113"}, {"Ptr": "p86"}, {"Ptr": "p87"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b33"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b56"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b41"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b33"}, {"Ptr": "b38"}, {"Ptr": "b56"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b59"}, {"Ptr": "b41"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_42_kernel", [{"Ptr": "b77"}, {"Ptr": "b17"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b74"}, {"Ptr": "b77"}, {"Ptr": "p88"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b83"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b80"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b92"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b104"}, {"Ptr": "b83"}, {"Ptr": "b38"}, {"Ptr": "b80"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b86"}, {"Ptr": "b92"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_43_kernel", [{"Ptr": "b113"}, {"Ptr": "b16"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b107"}, {"Ptr": "b113"}, {"Ptr": "p90"}, {"Ptr": "p91"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b95"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b33"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b56"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b41"}, {"Ptr": "b95"}, {"Ptr": "b38"}, {"Ptr": "b33"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b77"}, {"Ptr": "b56"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_44_kernel", [{"Ptr": "b74"}, {"Ptr": "b15"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b50"}, {"Ptr": "b74"}, {"Ptr": "p92"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b83"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b80"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b92"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b113"}, {"Ptr": "b83"}, {"Ptr": "b38"}, {"Ptr": "b80"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b107"}, {"Ptr": "b92"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_45_kernel", [{"Ptr": "b104"}, {"Ptr": "b14"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b95"}, {"Ptr": "b104"}, {"Ptr": "p94"}, {"Ptr": "p95"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b33"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b56"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b74"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b33"}, {"Ptr": "b38"}, {"Ptr": "b56"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b41"}, {"Ptr": "b74"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_46_kernel", [{"Ptr": "b83"}, {"Ptr": "b13"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b80"}, {"Ptr": "b83"}, {"Ptr": "p96"}, {"Ptr": "p97"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b92"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b104"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b95"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b113"}, {"Ptr": "b92"}, {"Ptr": "b38"}, {"Ptr": "b104"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b33"}, {"Ptr": "b95"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_47_kernel", [{"Ptr": "b56"}, {"Ptr": "b12"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b74"}, {"Ptr": "b56"}, {"Ptr": "p98"}, {"Ptr": "p99"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b83"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b80"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b50"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b92"}, {"Ptr": "b83"}, {"Ptr": "b38"}, {"Ptr": "b80"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b104"}, {"Ptr": "b50"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_48_kernel", [{"Ptr": "b95"}, {"Ptr": "b11"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b56"}, {"Ptr": "b95"}, {"Ptr": "p100"}, {"Ptr": "p101"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b74"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b113"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b83"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b80"}, {"Ptr": "b74"}, {"Ptr": "b38"}, {"Ptr": "b113"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b50"}, {"Ptr": "b83"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_49_kernel", [{"Ptr": "b95"}, {"Ptr": "b10"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b56"}, {"Ptr": "b95"}, {"Ptr": "p102"}, {"Ptr": "p103"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b92"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b74"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b113"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b83"}, {"Ptr": "b92"}, {"Ptr": "b38"}, {"Ptr": "b74"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b95"}, {"Ptr": "b113"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_50_kernel", [{"Ptr": "b56"}, {"Ptr": "b9"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b80"}, {"Ptr": "b56"}, {"Ptr": "p104"}, {"Ptr": "p105"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b92"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b74"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b113"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b56"}, {"Ptr": "b92"}, {"Ptr": "b38"}, {"Ptr": "b74"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b80"}, {"Ptr": "b113"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_51_kernel", [{"Ptr": "b83"}, {"Ptr": "output"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b92"}, {"Ptr": "b83"}, {"Ptr": "p106"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b74"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b113"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b83"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b92"}, {"Ptr": "b74"}, {"Ptr": "b38"}, {"Ptr": "b113"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b56"}, {"Ptr": "b83"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_52_kernel", [{"Ptr": "b74"}, {"Ptr": "b7"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b113"}, {"Ptr": "b74"}, {"Ptr": "p108"}, {"Ptr": "p109"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b83"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b74"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b3"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b113"}, {"Ptr": "b83"}, {"Ptr": "b38"}, {"Ptr": "b74"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b92"}, {"Ptr": "b3"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_53_kernel", [{"Ptr": "b83"}, {"Ptr": "b6"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b74"}, {"Ptr": "b83"}, {"Ptr": "p110"}, {"Ptr": "p111"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b3"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b83"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b2"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b74"}, {"Ptr": "b3"}, {"Ptr": "b38"}, {"Ptr": "b83"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b113"}, {"Ptr": "b2"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_54_kernel", [{"Ptr": "b83"}, {"Ptr": "b5"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b83"}, {"Ptr": "p112"}, {"Ptr": "p113"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b38"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b3"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b83"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b1"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b3"}, {"Ptr": "b38"}, {"Ptr": "b83"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b74"}, {"Ptr": "b1"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_55_kernel", [{"Ptr": "b83"}, {"Ptr": "b4"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b30"}, {"Ptr": "b83"}, {"Ptr": "p114"}, {"Ptr": "p115"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel", [{"Ptr": "b38"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_1", [{"Ptr": "b3"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_2", [{"Ptr": "b1"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_3", [{"Ptr": "b34"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_4", [{"Ptr": "b35"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_5", [{"Ptr": "b28"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_6", [{"Ptr": "b27"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_7", [{"Ptr": "b26"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_8", [{"Ptr": "b25"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_9", [{"Ptr": "b24"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_10", [{"Ptr": "b23"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_11", [{"Ptr": "b22"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_12", [{"Ptr": "b21"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_13", [{"Ptr": "b20"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_14", [{"Ptr": "b19"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_15", [{"Ptr": "b18"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_16", [{"Ptr": "b17"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_17", [{"Ptr": "b16"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_18", [{"Ptr": "b15"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_19", [{"Ptr": "b14"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_20", [{"Ptr": "b13"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_21", [{"Ptr": "b12"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_22", [{"Ptr": "b11"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_23", [{"Ptr": "b10"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_24", [{"Ptr": "b9"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_25", [{"Ptr": "output"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_26", [{"Ptr": "b7"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_27", [{"Ptr": "b6"}, {"Ptr": "b30"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_28_kernel", [{"Ptr": "b83"}, {"Ptr": "b38"}, {"Ptr": "p116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b30"}, {"Ptr": "b83"}, {"Ptr": "p117"}, {"Ptr": "p118"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b2"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b47"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b68"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b89"}, {"Ptr": "b47"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_29_kernel", [{"Ptr": "b110"}, {"Ptr": "b3"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b110"}, {"Ptr": "p120"}, {"Ptr": "p121"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b65"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b98"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b65"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b71"}, {"Ptr": "b98"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_30_kernel", [{"Ptr": "b101"}, {"Ptr": "b1"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b101"}, {"Ptr": "p122"}, {"Ptr": "p123"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b116"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b59"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b62"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b86"}, {"Ptr": "b116"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_31_kernel", [{"Ptr": "b77"}, {"Ptr": "b34"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b107"}, {"Ptr": "b77"}, {"Ptr": "p124"}, {"Ptr": "p125"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b41"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b33"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b104"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b41"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b50"}, {"Ptr": "b33"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_32_kernel", [{"Ptr": "b95"}, {"Ptr": "b35"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b80"}, {"Ptr": "b95"}, {"Ptr": "p126"}, {"Ptr": "p127"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b56"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b92"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b113"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b56"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b74"}, {"Ptr": "b92"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_33_kernel", [{"Ptr": "b83"}, {"Ptr": "b28"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b30"}, {"Ptr": "b83"}, {"Ptr": "p128"}, {"Ptr": "p129"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b2"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b47"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b110"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b53"}, {"Ptr": "b47"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_34_kernel", [{"Ptr": "b68"}, {"Ptr": "b27"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b65"}, {"Ptr": "b68"}, {"Ptr": "p130"}, {"Ptr": "p131"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b98"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b101"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b98"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b36"}, {"Ptr": "b101"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_35_kernel", [{"Ptr": "b62"}, {"Ptr": "b26"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b116"}, {"Ptr": "b62"}, {"Ptr": "p132"}, {"Ptr": "p133"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b77"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b107"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b59"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b77"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b41"}, {"Ptr": "b107"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_36_kernel", [{"Ptr": "b33"}, {"Ptr": "b25"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b95"}, {"Ptr": "b33"}, {"Ptr": "p134"}, {"Ptr": "p135"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b80"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b104"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b56"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b80"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b92"}, {"Ptr": "b104"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_37_kernel", [{"Ptr": "b83"}, {"Ptr": "b24"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b30"}, {"Ptr": "b83"}, {"Ptr": "p136"}, {"Ptr": "p137"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b113"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b2"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b47"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b113"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b68"}, {"Ptr": "b2"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_38_kernel", [{"Ptr": "b65"}, {"Ptr": "b23"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b110"}, {"Ptr": "b65"}, {"Ptr": "p138"}, {"Ptr": "p139"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b98"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b101"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b98"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b116"}, {"Ptr": "b101"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_39_kernel", [{"Ptr": "b44"}, {"Ptr": "b22"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b77"}, {"Ptr": "b44"}, {"Ptr": "p140"}, {"Ptr": "p141"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b107"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b33"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b95"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b107"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b59"}, {"Ptr": "b33"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_40_kernel", [{"Ptr": "b80"}, {"Ptr": "b21"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b104"}, {"Ptr": "b80"}, {"Ptr": "p142"}, {"Ptr": "p143"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b83"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b30"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b56"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b83"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b113"}, {"Ptr": "b30"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_41_kernel", [{"Ptr": "b2"}, {"Ptr": "b20"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b65"}, {"Ptr": "b2"}, {"Ptr": "p144"}, {"Ptr": "p145"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b110"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b47"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b98"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b110"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b101"}, {"Ptr": "b47"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_42_kernel", [{"Ptr": "b44"}, {"Ptr": "b19"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b77"}, {"Ptr": "b44"}, {"Ptr": "p146"}, {"Ptr": "p147"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b107"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b33"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b62"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b80"}, {"Ptr": "b107"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_43_kernel", [{"Ptr": "b104"}, {"Ptr": "b18"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b95"}, {"Ptr": "b104"}, {"Ptr": "p148"}, {"Ptr": "p149"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b83"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b30"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b2"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b83"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b65"}, {"Ptr": "b30"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_44_kernel", [{"Ptr": "b56"}, {"Ptr": "b17"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b110"}, {"Ptr": "b56"}, {"Ptr": "p150"}, {"Ptr": "p151"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b47"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b44"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b77"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b47"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b98"}, {"Ptr": "b44"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_45_kernel", [{"Ptr": "b62"}, {"Ptr": "b16"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b107"}, {"Ptr": "b62"}, {"Ptr": "p152"}, {"Ptr": "p153"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b104"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b95"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b33"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b104"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b83"}, {"Ptr": "b95"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_46_kernel", [{"Ptr": "b30"}, {"Ptr": "b15"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b56"}, {"Ptr": "b30"}, {"Ptr": "p154"}, {"Ptr": "p155"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b110"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b2"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b47"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b110"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b44"}, {"Ptr": "b2"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_47_kernel", [{"Ptr": "b62"}, {"Ptr": "b14"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b107"}, {"Ptr": "b62"}, {"Ptr": "p156"}, {"Ptr": "p157"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b77"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b104"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b95"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b77"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b30"}, {"Ptr": "b104"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_48_kernel", [{"Ptr": "b56"}, {"Ptr": "b13"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b33"}, {"Ptr": "b56"}, {"Ptr": "p158"}, {"Ptr": "p159"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b110"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b2"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b110"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b107"}, {"Ptr": "b2"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_49_kernel", [{"Ptr": "b47"}, {"Ptr": "b12"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b77"}, {"Ptr": "b47"}, {"Ptr": "p160"}, {"Ptr": "p161"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b104"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b56"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b33"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b104"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b95"}, {"Ptr": "b56"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_50_kernel", [{"Ptr": "b110"}, {"Ptr": "b11"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b2"}, {"Ptr": "b110"}, {"Ptr": "p162"}, {"Ptr": "p163"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b47"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b77"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b47"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b104"}, {"Ptr": "b77"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_51_kernel", [{"Ptr": "b56"}, {"Ptr": "b10"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b110"}, {"Ptr": "b56"}, {"Ptr": "p164"}, {"Ptr": "p165"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b2"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b33"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b47"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b77"}, {"Ptr": "b33"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_52_kernel", [{"Ptr": "b56"}, {"Ptr": "b9"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b110"}, {"Ptr": "b56"}, {"Ptr": "p166"}, {"Ptr": "p167"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b2"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b33"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b62"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b56"}, {"Ptr": "b2"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_53_kernel", [{"Ptr": "b110"}, {"Ptr": "output"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b47"}, {"Ptr": "b110"}, {"Ptr": "p168"}, {"Ptr": "p169"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b2"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b110"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b62"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b47"}, {"Ptr": "b2"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_54_kernel", [{"Ptr": "b33"}, {"Ptr": "b7"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b62"}, {"Ptr": "b33"}, {"Ptr": "p170"}, {"Ptr": "p171"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b5"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b4"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b2"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b33"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b4"}, {"Ptr": "b5"}, {"Ptr": "b2"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b110"}, {"Ptr": "b33"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_55_kernel", [{"Ptr": "b2"}, {"Ptr": "b6"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b33"}, {"Ptr": "b2"}, {"Ptr": "p172"}, {"Ptr": "p173"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel", [{"Ptr": "b5"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_1", [{"Ptr": "b4"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_2", [{"Ptr": "b38"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_3", [{"Ptr": "b3"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_4", [{"Ptr": "b1"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_5", [{"Ptr": "b34"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_6", [{"Ptr": "b35"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_7", [{"Ptr": "b28"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_8", [{"Ptr": "b27"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_9", [{"Ptr": "b26"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_10", [{"Ptr": "b25"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_11", [{"Ptr": "b24"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_12", [{"Ptr": "b23"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_13", [{"Ptr": "b22"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_14", [{"Ptr": "b21"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_15", [{"Ptr": "b20"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_16", [{"Ptr": "b19"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_17", [{"Ptr": "b18"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_18", [{"Ptr": "b17"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_19", [{"Ptr": "b16"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_20", [{"Ptr": "b15"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_21", [{"Ptr": "b14"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_22", [{"Ptr": "b13"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_23", [{"Ptr": "b12"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_24", [{"Ptr": "b11"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_25", [{"Ptr": "b10"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_26", [{"Ptr": "b9"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_27", [{"Ptr": "output"}, {"Ptr": "b33"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_28_kernel", [{"Ptr": "b2"}, {"Ptr": "b5"}, {"Ptr": "p174"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b33"}, {"Ptr": "b2"}, {"Ptr": "p175"}, {"Ptr": "p176"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b89"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b71"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b62"}, {"Ptr": "p177"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b86"}, {"Ptr": "b89"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_29_kernel", [{"Ptr": "b50"}, {"Ptr": "b4"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b74"}, {"Ptr": "b50"}, {"Ptr": "p178"}, {"Ptr": "p179"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b53"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b36"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b41"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b53"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b92"}, {"Ptr": "b36"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_30_kernel", [{"Ptr": "b68"}, {"Ptr": "b38"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b116"}, {"Ptr": "b68"}, {"Ptr": "p180"}, {"Ptr": "p181"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b59"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b113"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b101"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b59"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b80"}, {"Ptr": "b113"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_31_kernel", [{"Ptr": "b65"}, {"Ptr": "b3"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b98"}, {"Ptr": "b65"}, {"Ptr": "p182"}, {"Ptr": "p183"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b83"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b44"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b30"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b83"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b107"}, {"Ptr": "b44"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_32_kernel", [{"Ptr": "b95"}, {"Ptr": "b1"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b104"}, {"Ptr": "b95"}, {"Ptr": "p184"}, {"Ptr": "p185"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b77"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b56"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b47"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b77"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b110"}, {"Ptr": "b56"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_33_kernel", [{"Ptr": "b2"}, {"Ptr": "b34"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b33"}, {"Ptr": "b2"}, {"Ptr": "p186"}, {"Ptr": "p187"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b89"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b62"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b74"}, {"Ptr": "b89"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_34_kernel", [{"Ptr": "b71"}, {"Ptr": "b35"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b71"}, {"Ptr": "p188"}, {"Ptr": "p189"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b36"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b68"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b116"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b36"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b41"}, {"Ptr": "b68"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_35_kernel", [{"Ptr": "b59"}, {"Ptr": "b28"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b113"}, {"Ptr": "b59"}, {"Ptr": "p190"}, {"Ptr": "p191"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b65"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b98"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b101"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b65"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b83"}, {"Ptr": "b98"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_36_kernel", [{"Ptr": "b44"}, {"Ptr": "b27"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b95"}, {"Ptr": "b44"}, {"Ptr": "p192"}, {"Ptr": "p193"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b104"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b30"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b77"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b104"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b56"}, {"Ptr": "b30"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_37_kernel", [{"Ptr": "b2"}, {"Ptr": "b26"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b33"}, {"Ptr": "b2"}, {"Ptr": "p194"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b47"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b62"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b89"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b47"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b71"}, {"Ptr": "b62"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_38_kernel", [{"Ptr": "b53"}, {"Ptr": "b25"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b50"}, {"Ptr": "b53"}, {"Ptr": "p196"}, {"Ptr": "p197"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b36"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b68"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b59"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b36"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b113"}, {"Ptr": "b68"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_39_kernel", [{"Ptr": "b116"}, {"Ptr": "b24"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b65"}, {"Ptr": "b116"}, {"Ptr": "p198"}, {"Ptr": "p199"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b98"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b44"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b95"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b98"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b101"}, {"Ptr": "b44"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_40_kernel", [{"Ptr": "b104"}, {"Ptr": "b23"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b30"}, {"Ptr": "b104"}, {"Ptr": "p200"}, {"Ptr": "p201"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b2"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b33"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b77"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b2"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b47"}, {"Ptr": "b33"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_41_kernel", [{"Ptr": "b62"}, {"Ptr": "b22"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b62"}, {"Ptr": "p202"}, {"Ptr": "p203"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b50"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b89"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b50"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b68"}, {"Ptr": "b89"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_42_kernel", [{"Ptr": "b116"}, {"Ptr": "b21"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b65"}, {"Ptr": "b116"}, {"Ptr": "p204"}, {"Ptr": "p205"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b59"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b98"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b59"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b104"}, {"Ptr": "b98"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_43_kernel", [{"Ptr": "b30"}, {"Ptr": "b20"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b95"}, {"Ptr": "b30"}, {"Ptr": "p206"}, {"Ptr": "p207"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b2"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b33"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b2"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b53"}, {"Ptr": "b33"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_44_kernel", [{"Ptr": "b77"}, {"Ptr": "b19"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b50"}, {"Ptr": "b77"}, {"Ptr": "p208"}, {"Ptr": "p209"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b89"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b116"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b65"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b89"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b36"}, {"Ptr": "b116"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_45_kernel", [{"Ptr": "b59"}, {"Ptr": "b18"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b98"}, {"Ptr": "b59"}, {"Ptr": "p210"}, {"Ptr": "p211"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b95"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b30"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b2"}, {"Ptr": "b95"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_46_kernel", [{"Ptr": "b33"}, {"Ptr": "b17"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b77"}, {"Ptr": "b33"}, {"Ptr": "p212"}, {"Ptr": "p213"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b50"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b62"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b89"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b50"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b116"}, {"Ptr": "b62"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_47_kernel", [{"Ptr": "b59"}, {"Ptr": "b16"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b98"}, {"Ptr": "b59"}, {"Ptr": "p214"}, {"Ptr": "p215"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b65"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b30"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b95"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b65"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b33"}, {"Ptr": "b30"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_48_kernel", [{"Ptr": "b77"}, {"Ptr": "b15"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b77"}, {"Ptr": "p216"}, {"Ptr": "p217"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b50"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b62"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b59"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b50"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b98"}, {"Ptr": "b62"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_49_kernel", [{"Ptr": "b89"}, {"Ptr": "b14"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b65"}, {"Ptr": "b89"}, {"Ptr": "p218"}, {"Ptr": "p219"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b77"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b30"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b95"}, {"Ptr": "b77"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_50_kernel", [{"Ptr": "b50"}, {"Ptr": "b13"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b62"}, {"Ptr": "b50"}, {"Ptr": "p220"}, {"Ptr": "p221"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b89"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b65"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b59"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b89"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b30"}, {"Ptr": "b65"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_51_kernel", [{"Ptr": "b77"}, {"Ptr": "b12"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b50"}, {"Ptr": "b77"}, {"Ptr": "p222"}, {"Ptr": "p223"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b44"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b89"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b62"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b65"}, {"Ptr": "b44"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_52_kernel", [{"Ptr": "b77"}, {"Ptr": "b11"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b50"}, {"Ptr": "b77"}, {"Ptr": "p224"}, {"Ptr": "p225"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b59"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b62"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b59"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b77"}, {"Ptr": "b62"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_53_kernel", [{"Ptr": "b50"}, {"Ptr": "b10"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b89"}, {"Ptr": "b50"}, {"Ptr": "p226"}, {"Ptr": "p227"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b59"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b62"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b59"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b89"}, {"Ptr": "b62"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_54_kernel", [{"Ptr": "b44"}, {"Ptr": "b9"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b59"}, {"Ptr": "b44"}, {"Ptr": "p228"}, {"Ptr": "p229"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b7"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "b6"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b44"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b59"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "b62"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b50"}, {"Ptr": "b44"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_55_kernel", [{"Ptr": "b62"}, {"Ptr": "output"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b62"}, {"Ptr": "p230"}, {"Ptr": "p231"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel", [{"Ptr": "b7"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_1", [{"Ptr": "b6"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_2", [{"Ptr": "b5"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_3", [{"Ptr": "b4"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_4", [{"Ptr": "b38"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_5", [{"Ptr": "b3"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_6", [{"Ptr": "b1"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_7", [{"Ptr": "b34"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_8", [{"Ptr": "b35"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_9", [{"Ptr": "b28"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_10", [{"Ptr": "b27"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_11", [{"Ptr": "b26"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_12", [{"Ptr": "b25"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_13", [{"Ptr": "b24"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_14", [{"Ptr": "b23"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_15", [{"Ptr": "b22"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_16", [{"Ptr": "b21"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_17", [{"Ptr": "b20"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_18", [{"Ptr": "b19"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_19", [{"Ptr": "b18"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_20", [{"Ptr": "b17"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_21", [{"Ptr": "b16"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_22", [{"Ptr": "b15"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_23", [{"Ptr": "b14"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_24", [{"Ptr": "b13"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_25", [{"Ptr": "b12"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_26", [{"Ptr": "b11"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_2bcd01aae566f75b__kernel_27", [{"Ptr": "b10"}, {"Ptr": "b44"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_28_kernel", [{"Ptr": "b62"}, {"Ptr": "b7"}, {"Ptr": "p232"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b62"}, {"Ptr": "p233"}, {"Ptr": "p234"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b59"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b86"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b92"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b59"}, {"Ptr": "p235"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b80"}, {"Ptr": "b86"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_29_kernel", [{"Ptr": "b107"}, {"Ptr": "b6"}, {"Ptr": "b80"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b110"}, {"Ptr": "b107"}, {"Ptr": "p236"}, {"Ptr": "p237"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b74"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b41"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b83"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b74"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b56"}, {"Ptr": "b41"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_30_kernel", [{"Ptr": "b71"}, {"Ptr": "b5"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b113"}, {"Ptr": "b71"}, {"Ptr": "p238"}, {"Ptr": "p239"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b101"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b47"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b68"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b101"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b104"}, {"Ptr": "b47"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_31_kernel", [{"Ptr": "b53"}, {"Ptr": "b4"}, {"Ptr": "b104"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b36"}, {"Ptr": "b53"}, {"Ptr": "p240"}, {"Ptr": "p241"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b116"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b33"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b2"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b98"}, {"Ptr": "b116"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_32_kernel", [{"Ptr": "b95"}, {"Ptr": "b38"}, {"Ptr": "b98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b30"}, {"Ptr": "b95"}, {"Ptr": "p242"}, {"Ptr": "p243"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b65"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b77"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b89"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b65"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b50"}, {"Ptr": "b77"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_33_kernel", [{"Ptr": "b62"}, {"Ptr": "b3"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b62"}, {"Ptr": "p244"}, {"Ptr": "p245"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b59"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b86"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b107"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b59"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b110"}, {"Ptr": "b86"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_34_kernel", [{"Ptr": "b92"}, {"Ptr": "b1"}, {"Ptr": "b110"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b74"}, {"Ptr": "b92"}, {"Ptr": "p246"}, {"Ptr": "p247"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b41"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b71"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b113"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b41"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b83"}, {"Ptr": "b71"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_35_kernel", [{"Ptr": "b101"}, {"Ptr": "b34"}, {"Ptr": "b83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b47"}, {"Ptr": "b101"}, {"Ptr": "p248"}, {"Ptr": "p249"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b53"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b36"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b68"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b53"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b2"}, {"Ptr": "b36"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_36_kernel", [{"Ptr": "b116"}, {"Ptr": "b35"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b95"}, {"Ptr": "b116"}, {"Ptr": "p250"}, {"Ptr": "p251"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b30"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b33"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b65"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b30"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b77"}, {"Ptr": "b33"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_37_kernel", [{"Ptr": "b62"}, {"Ptr": "b28"}, {"Ptr": "b77"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b62"}, {"Ptr": "p252"}, {"Ptr": "p253"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b89"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b59"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b86"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b89"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b92"}, {"Ptr": "b59"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_38_kernel", [{"Ptr": "b74"}, {"Ptr": "b27"}, {"Ptr": "b92"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b107"}, {"Ptr": "b74"}, {"Ptr": "p254"}, {"Ptr": "p255"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b41"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b71"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b101"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b41"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b47"}, {"Ptr": "b71"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_39_kernel", [{"Ptr": "b113"}, {"Ptr": "b26"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b113"}, {"Ptr": "p256"}, {"Ptr": "p257"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b36"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b116"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b95"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b36"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b68"}, {"Ptr": "b116"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_40_kernel", [{"Ptr": "b30"}, {"Ptr": "b25"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b33"}, {"Ptr": "b30"}, {"Ptr": "p258"}, {"Ptr": "p259"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b44"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b65"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b62"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b89"}, {"Ptr": "b44"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_41_kernel", [{"Ptr": "b59"}, {"Ptr": "b24"}, {"Ptr": "b89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b74"}, {"Ptr": "b59"}, {"Ptr": "p260"}, {"Ptr": "p261"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b107"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b86"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b41"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b107"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b71"}, {"Ptr": "b86"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_42_kernel", [{"Ptr": "b113"}, {"Ptr": "b23"}, {"Ptr": "b71"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b113"}, {"Ptr": "p262"}, {"Ptr": "p263"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b101"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b36"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b116"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b101"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b30"}, {"Ptr": "b36"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_43_kernel", [{"Ptr": "b33"}, {"Ptr": "b22"}, {"Ptr": "b30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b95"}, {"Ptr": "b33"}, {"Ptr": "p264"}, {"Ptr": "p265"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b62"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b44"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b59"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b62"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b74"}, {"Ptr": "b44"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_44_kernel", [{"Ptr": "b65"}, {"Ptr": "b21"}, {"Ptr": "b74"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b107"}, {"Ptr": "b65"}, {"Ptr": "p266"}, {"Ptr": "p267"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b86"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b113"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b53"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b86"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b41"}, {"Ptr": "b113"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_45_kernel", [{"Ptr": "b101"}, {"Ptr": "b20"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b36"}, {"Ptr": "b101"}, {"Ptr": "p268"}, {"Ptr": "p269"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b33"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b95"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b116"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b33"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b62"}, {"Ptr": "b95"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_46_kernel", [{"Ptr": "b44"}, {"Ptr": "b19"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b65"}, {"Ptr": "b44"}, {"Ptr": "p270"}, {"Ptr": "p271"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b107"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b59"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b86"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b107"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b113"}, {"Ptr": "b59"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_47_kernel", [{"Ptr": "b101"}, {"Ptr": "b18"}, {"Ptr": "b113"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b36"}, {"Ptr": "b101"}, {"Ptr": "p272"}, {"Ptr": "p273"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b53"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b33"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b95"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b53"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b44"}, {"Ptr": "b33"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_48_kernel", [{"Ptr": "b65"}, {"Ptr": "b17"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b116"}, {"Ptr": "b65"}, {"Ptr": "p274"}, {"Ptr": "p275"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b107"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b59"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b101"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b107"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b36"}, {"Ptr": "b59"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_49_kernel", [{"Ptr": "b86"}, {"Ptr": "b16"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b86"}, {"Ptr": "p276"}, {"Ptr": "p277"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b33"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b65"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b116"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b33"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b95"}, {"Ptr": "b65"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_50_kernel", [{"Ptr": "b107"}, {"Ptr": "b15"}, {"Ptr": "b95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b59"}, {"Ptr": "b107"}, {"Ptr": "p278"}, {"Ptr": "p279"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b86"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b53"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b101"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b86"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b33"}, {"Ptr": "b53"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_51_kernel", [{"Ptr": "b65"}, {"Ptr": "b14"}, {"Ptr": "b33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b107"}, {"Ptr": "b65"}, {"Ptr": "p280"}, {"Ptr": "p281"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b59"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b116"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b86"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b59"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b53"}, {"Ptr": "b116"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_52_kernel", [{"Ptr": "b65"}, {"Ptr": "b13"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b107"}, {"Ptr": "b65"}, {"Ptr": "p282"}, {"Ptr": "p283"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b101"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b59"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b116"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b101"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b65"}, {"Ptr": "b59"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_53_kernel", [{"Ptr": "b107"}, {"Ptr": "b12"}, {"Ptr": "b65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b86"}, {"Ptr": "b107"}, {"Ptr": "p284"}, {"Ptr": "p285"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b101"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b59"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b107"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b101"}, {"Ptr": "b116"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b86"}, {"Ptr": "b59"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_54_kernel", [{"Ptr": "b116"}, {"Ptr": "b11"}, {"Ptr": "b86"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b101"}, {"Ptr": "b116"}, {"Ptr": "p286"}, {"Ptr": "p287"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel", [{"Ptr": "b9"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_1", [{"Ptr": "output"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_2", [{"Ptr": "b59"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_kernel_3", [{"Ptr": "b116"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_multiply_sigmoid_tanh_multiply_add_kernel", [{"Ptr": "b101"}, {"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "b59"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_sigmoid_tanh_multiply_kernel", [{"Ptr": "b107"}, {"Ptr": "b116"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_concatenate_55_kernel", [{"Ptr": "b59"}, {"Ptr": "b10"}, {"Ptr": "b107"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "b116"}, {"Ptr": "b59"}, {"Ptr": "p288"}, {"Ptr": "p289"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_split_sigmoid_sigmoid_multiply_sigmoid_tanh_multiply_add_tanh_multiply_sta_4e0306112785fa15__kernel", [{"Ptr": "b9"}, {"Ptr": "b116"}, {"Ptr": "b101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_2_kernel", [{"Ptr": "output"}, {"Ptr": "b9"}, {"Ptr": "p290"}, {"Ptr": "p291"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_matmul.json b/machine_interface/tests/data/cuda/test_gpu_matmul.json new file mode 100644 index 00000000..f75860c4 --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_matmul.json @@ -0,0 +1,24 @@ +{ + "modules": [ + { "module_name": "kernelCheck.cubin", "path": "kernelCheck.cubin" } + ], + "kernels": [ + { "module_name": "kernelCheck.cubin", "kernel_name": "matmul" } + ], + "blueprint": { + "inputs": ["A", "B"], + "buffers": {"C": {"Absolute": 64}}, + "outputs": ["C"], + "control_flow": [ + {"ExecKernel": ["matmul", [{"Ptr": "A"}, {"Ptr": "B"}, {"Ptr": "C"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 4}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_minimal.json b/machine_interface/tests/data/cuda/test_gpu_minimal.json new file mode 100644 index 00000000..569ac795 --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_minimal.json @@ -0,0 +1,25 @@ +{ + "modules": [ + { "module_name": "kernelCheck.cubin", "path": "kernelCheck.cubin" }, + { "module_name": "kernelCheck.ptx", "path": "kernelCheck.ptx" } + ], + "kernels": [ + { "module_name": "kernelCheck.ptx", "kernel_name": "nothing" } + ], + "blueprint": { + "inputs": [], + "buffers": {}, + "outputs": [], + "control_flow": [ + {"ExecKernel": ["nothing", [], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_mobilenetv2.json b/machine_interface/tests/data/cuda/test_gpu_mobilenetv2.json new file mode 100644 index 00000000..dfe5d96b --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_mobilenetv2.json @@ -0,0 +1,540 @@ +{ + "modules": [{"module_name": "mobilenetv2.cubin", "path": "cuda/mobilenetv2.cubin"}], + "kernels": [ + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_1_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_2_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_3_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_4_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_5_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_6_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_1_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_2_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_3_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_4_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_10_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_11_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_12_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_13_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_14_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_15_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_16_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_17_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_1_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_2_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_3_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_4_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_5_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_6_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_7_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_8_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_9_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_clip_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel"}, + {"module_name": "mobilenetv2.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel_1"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105"], + "buffers": {"output": {"Absolute": 4000}, "b3": {"Absolute": 1806336}, "b6": {"Absolute": 4816896}, "b19": {"Absolute": 1806336}, "b28": {"Absolute": 100352}, "b35": {"Absolute": 100352}, "b110": {"Absolute": 5120}, "b114": {"Absolute": 8192}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_kernel", [{"Ptr": "b3"}, {"Ptr": "input"}, {"Ptr": "p0"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 16}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_1_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "p2"}, {"Ptr": "p3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 112}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b6"}, {"Ptr": "p4"}, {"Ptr": "p5"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 16}, + "block_dim_y": {"Absolute": 8}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_2_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "p6"}, {"Ptr": "p7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 56}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 16}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_3_kernel", [{"Ptr": "b3"}, {"Ptr": "b6"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 96}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_1_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "p10"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 3}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_4_kernel", [{"Ptr": "b3"}, {"Ptr": "b6"}, {"Ptr": "p12"}, {"Ptr": "p13"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 6}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 6}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_5_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p14"}, {"Ptr": "p15"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 28}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p16"}, {"Ptr": "p17"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_4_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 6}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 6}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_6_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p20"}, {"Ptr": "p21"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 144}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_2_kernel", [{"Ptr": "b28"}, {"Ptr": "b3"}, {"Ptr": "p22"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_7_kernel", [{"Ptr": "b19"}, {"Ptr": "b28"}, {"Ptr": "p24"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 6}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p26"}, {"Ptr": "p27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 96}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_1_kernel", [{"Ptr": "b35"}, {"Ptr": "b3"}, {"Ptr": "p28"}, {"Ptr": "p29"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 2}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_7_kernel", [{"Ptr": "b19"}, {"Ptr": "b35"}, {"Ptr": "p30"}, {"Ptr": "p31"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 6}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p32"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 96}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_1_kernel", [{"Ptr": "b28"}, {"Ptr": "b3"}, {"Ptr": "p34"}, {"Ptr": "p35"}, {"Ptr": "b35"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 2}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_7_kernel", [{"Ptr": "b19"}, {"Ptr": "b28"}, {"Ptr": "p36"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 6}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_9_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p38"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 2}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_3_kernel", [{"Ptr": "b35"}, {"Ptr": "b3"}, {"Ptr": "p40"}, {"Ptr": "p41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_10_kernel", [{"Ptr": "b19"}, {"Ptr": "b35"}, {"Ptr": "p42"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 2}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_11_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p44"}, {"Ptr": "p45"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 384}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_2_kernel", [{"Ptr": "b28"}, {"Ptr": "b3"}, {"Ptr": "p46"}, {"Ptr": "p47"}, {"Ptr": "b35"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_10_kernel", [{"Ptr": "b19"}, {"Ptr": "b28"}, {"Ptr": "p48"}, {"Ptr": "p49"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 2}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_11_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p50"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 384}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_2_kernel", [{"Ptr": "b35"}, {"Ptr": "b3"}, {"Ptr": "p52"}, {"Ptr": "p53"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_10_kernel", [{"Ptr": "b19"}, {"Ptr": "b35"}, {"Ptr": "p54"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 2}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_11_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p56"}, {"Ptr": "p57"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 384}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_2_kernel", [{"Ptr": "b28"}, {"Ptr": "b3"}, {"Ptr": "p58"}, {"Ptr": "p59"}, {"Ptr": "b35"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_10_kernel", [{"Ptr": "b19"}, {"Ptr": "b28"}, {"Ptr": "p60"}, {"Ptr": "p61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 2}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_11_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p62"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 384}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_4_kernel", [{"Ptr": "b35"}, {"Ptr": "b3"}, {"Ptr": "p64"}, {"Ptr": "p65"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_12_kernel", [{"Ptr": "b19"}, {"Ptr": "b35"}, {"Ptr": "p66"}, {"Ptr": "p67"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_13_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p68"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 144}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_3_kernel", [{"Ptr": "b28"}, {"Ptr": "b3"}, {"Ptr": "p70"}, {"Ptr": "p71"}, {"Ptr": "b35"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 6}, + "block_dim_x": {"Absolute": 2}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_12_kernel", [{"Ptr": "b19"}, {"Ptr": "b28"}, {"Ptr": "p72"}, {"Ptr": "p73"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_13_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p74"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 144}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_3_kernel", [{"Ptr": "b35"}, {"Ptr": "b3"}, {"Ptr": "p76"}, {"Ptr": "p77"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 6}, + "block_dim_x": {"Absolute": 2}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_12_kernel", [{"Ptr": "b19"}, {"Ptr": "b35"}, {"Ptr": "p78"}, {"Ptr": "p79"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_14_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p80"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 24}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 12}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_5_kernel", [{"Ptr": "b28"}, {"Ptr": "b3"}, {"Ptr": "p82"}, {"Ptr": "p83"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 20}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_15_kernel", [{"Ptr": "b19"}, {"Ptr": "b28"}, {"Ptr": "p84"}, {"Ptr": "p85"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_16_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p86"}, {"Ptr": "p87"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 48}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 5}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_4_kernel", [{"Ptr": "b35"}, {"Ptr": "b3"}, {"Ptr": "p88"}, {"Ptr": "p89"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_15_kernel", [{"Ptr": "b19"}, {"Ptr": "b35"}, {"Ptr": "p90"}, {"Ptr": "p91"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_16_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p92"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 48}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 5}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_4_kernel", [{"Ptr": "b28"}, {"Ptr": "b3"}, {"Ptr": "p94"}, {"Ptr": "p95"}, {"Ptr": "b35"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 4}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_15_kernel", [{"Ptr": "b19"}, {"Ptr": "b28"}, {"Ptr": "p96"}, {"Ptr": "p97"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_16_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p98"}, {"Ptr": "p99"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 48}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 5}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_6_kernel", [{"Ptr": "b35"}, {"Ptr": "b3"}, {"Ptr": "p100"}, {"Ptr": "p101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 5}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_clip_17_kernel", [{"Ptr": "b19"}, {"Ptr": "b35"}, {"Ptr": "p102"}, {"Ptr": "p103"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 20}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel", [{"Ptr": "b114"}, {"Ptr": "b19"}], { + "grid_dim_x": {"Absolute": 40}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel_1", [{"Ptr": "b110"}, {"Ptr": "b114"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "output"}, {"Ptr": "b110"}, {"Ptr": "p104"}, {"Ptr": "p105"}], { + "grid_dim_x": {"Absolute": 1000}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_resnet101.json b/machine_interface/tests/data/cuda/test_gpu_resnet101.json new file mode 100644 index 00000000..e5426c3b --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_resnet101.json @@ -0,0 +1,1560 @@ +{ + "modules": [{"module_name": "resnet101.cubin", "path": "cuda/resnet101.cubin"}], + "kernels": [ + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_1_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_2_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_3_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_10_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_11_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_1_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_3_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_4_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_6_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_7_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_9_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel_1"}, + {"module_name": "resnet101.cubin", "kernel_name": "tvmgen_default_fused_nn_max_pool2d_kernel"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107", "p108", "p109", "p110", "p111", "p112", "p113", "p114", "p115", "p116", "p117", "p118", "p119", "p120", "p121", "p122", "p123", "p124", "p125", "p126", "p127", "p128", "p129", "p130", "p131", "p132", "p133", "p134", "p135", "p136", "p137", "p138", "p139", "p140", "p141", "p142", "p143", "p144", "p145", "p146", "p147", "p148", "p149", "p150", "p151", "p152", "p153", "p154", "p155", "p156", "p157", "p158", "p159", "p160", "p161", "p162", "p163", "p164", "p165", "p166", "p167", "p168", "p169", "p170", "p171", "p172", "p173", "p174", "p175", "p176", "p177", "p178", "p179", "p180", "p181", "p182", "p183", "p184", "p185", "p186", "p187", "p188", "p189", "p190", "p191", "p192", "p193", "p194", "p195", "p196", "p197", "p198", "p199", "p200", "p201", "p202", "p203", "p204", "p205", "p206", "p207", "p208", "p209"], + "buffers": {"output": {"Absolute": 4000}, "b3": {"Absolute": 3211264}, "b4": {"Absolute": 3211264}, "b9": {"Absolute": 3211264}, "b196": {"Absolute": 100352}, "b205": {"Absolute": 100352}, "b217": {"Absolute": 1806336}, "b218": {"Absolute": 1806336}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_kernel", [{"Ptr": "b3"}, {"Ptr": "input"}, {"Ptr": "p0"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 112}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 16}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_max_pool2d_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p2"}, {"Ptr": "p3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 56}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b218"}, {"Ptr": "p5"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p6"}, {"Ptr": "p7"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p10"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 56}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b217"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b218"}, {"Ptr": "p13"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p14"}, {"Ptr": "p15"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p16"}, {"Ptr": "p17"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 56}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b218"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p20"}, {"Ptr": "p21"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_3_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p22"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_4_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p24"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p28"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 4}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p26"}, {"Ptr": "p27"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p30"}, {"Ptr": "p31"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b217"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b218"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p34"}, {"Ptr": "p35"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p36"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b217"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p38"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b218"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p40"}, {"Ptr": "p41"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p42"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p44"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b218"}, {"Ptr": "p45"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p46"}, {"Ptr": "p47"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_6_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p48"}, {"Ptr": "p49"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 2}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_7_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p50"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p54"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p52"}, {"Ptr": "p53"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p56"}, {"Ptr": "p57"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p58"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b218"}, {"Ptr": "p59"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p60"}, {"Ptr": "p61"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p62"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p64"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b218"}, {"Ptr": "p65"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p66"}, {"Ptr": "p67"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p68"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p70"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b218"}, {"Ptr": "p71"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p72"}, {"Ptr": "p73"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p74"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p76"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b218"}, {"Ptr": "p77"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p78"}, {"Ptr": "p79"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p80"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p82"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b218"}, {"Ptr": "p83"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p84"}, {"Ptr": "p85"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p86"}, {"Ptr": "p87"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p88"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b218"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p90"}, {"Ptr": "p91"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p92"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p94"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b218"}, {"Ptr": "p95"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p96"}, {"Ptr": "p97"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p98"}, {"Ptr": "p99"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p100"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b218"}, {"Ptr": "p101"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p102"}, {"Ptr": "p103"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p104"}, {"Ptr": "p105"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p106"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b218"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p108"}, {"Ptr": "p109"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p110"}, {"Ptr": "p111"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p112"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b218"}, {"Ptr": "p113"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p114"}, {"Ptr": "p115"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p116"}, {"Ptr": "p117"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p118"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b218"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p120"}, {"Ptr": "p121"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p122"}, {"Ptr": "p123"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p124"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b218"}, {"Ptr": "p125"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p126"}, {"Ptr": "p127"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p128"}, {"Ptr": "p129"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p130"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b218"}, {"Ptr": "p131"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p132"}, {"Ptr": "p133"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p134"}, {"Ptr": "p135"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p136"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b218"}, {"Ptr": "p137"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p138"}, {"Ptr": "p139"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p140"}, {"Ptr": "p141"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p142"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b218"}, {"Ptr": "p143"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p144"}, {"Ptr": "p145"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p146"}, {"Ptr": "p147"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p148"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b218"}, {"Ptr": "p149"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p150"}, {"Ptr": "p151"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p152"}, {"Ptr": "p153"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p154"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b218"}, {"Ptr": "p155"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p156"}, {"Ptr": "p157"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p158"}, {"Ptr": "p159"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p160"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b218"}, {"Ptr": "p161"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p162"}, {"Ptr": "p163"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p164"}, {"Ptr": "p165"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p166"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b218"}, {"Ptr": "p167"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p168"}, {"Ptr": "p169"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p170"}, {"Ptr": "p171"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p172"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b218"}, {"Ptr": "p173"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p174"}, {"Ptr": "p175"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p176"}, {"Ptr": "p177"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p178"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b218"}, {"Ptr": "p179"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p180"}, {"Ptr": "p181"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p182"}, {"Ptr": "p183"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b217"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p184"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b218"}, {"Ptr": "p185"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p186"}, {"Ptr": "p187"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_9_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p188"}, {"Ptr": "p189"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_10_kernel", [{"Ptr": "b196"}, {"Ptr": "b9"}, {"Ptr": "p190"}, {"Ptr": "p191"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_3_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p194"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel", [{"Ptr": "b9"}, {"Ptr": "b196"}, {"Ptr": "p192"}, {"Ptr": "p193"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 64}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_11_kernel", [{"Ptr": "b196"}, {"Ptr": "b9"}, {"Ptr": "p196"}, {"Ptr": "p197"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel", [{"Ptr": "b217"}, {"Ptr": "b196"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p198"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2", [{"Ptr": "b205"}, {"Ptr": "b218"}, {"Ptr": "p199"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel", [{"Ptr": "b4"}, {"Ptr": "b205"}, {"Ptr": "p200"}, {"Ptr": "p201"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 64}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_11_kernel", [{"Ptr": "b196"}, {"Ptr": "b4"}, {"Ptr": "p202"}, {"Ptr": "p203"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel", [{"Ptr": "b217"}, {"Ptr": "b196"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1", [{"Ptr": "b218"}, {"Ptr": "b217"}, {"Ptr": "p204"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2", [{"Ptr": "b205"}, {"Ptr": "b218"}, {"Ptr": "p205"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel", [{"Ptr": "b3"}, {"Ptr": "b205"}, {"Ptr": "p206"}, {"Ptr": "p207"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 64}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel", [{"Ptr": "b217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel_1", [{"Ptr": "b196"}, {"Ptr": "b217"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "output"}, {"Ptr": "b196"}, {"Ptr": "p208"}, {"Ptr": "p209"}], { + "grid_dim_x": {"Absolute": 1000}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_resnet152.json b/machine_interface/tests/data/cuda/test_gpu_resnet152.json new file mode 100644 index 00000000..da97929c --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_resnet152.json @@ -0,0 +1,2325 @@ +{ + "modules": [{"module_name": "resnet152.cubin", "path": "cuda/resnet152.cubin"}], + "kernels": [ + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_1_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_2_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_3_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_10_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_11_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_1_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_3_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_4_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_6_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_7_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_9_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel_1"}, + {"module_name": "resnet152.cubin", "kernel_name": "tvmgen_default_fused_nn_max_pool2d_kernel"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107", "p108", "p109", "p110", "p111", "p112", "p113", "p114", "p115", "p116", "p117", "p118", "p119", "p120", "p121", "p122", "p123", "p124", "p125", "p126", "p127", "p128", "p129", "p130", "p131", "p132", "p133", "p134", "p135", "p136", "p137", "p138", "p139", "p140", "p141", "p142", "p143", "p144", "p145", "p146", "p147", "p148", "p149", "p150", "p151", "p152", "p153", "p154", "p155", "p156", "p157", "p158", "p159", "p160", "p161", "p162", "p163", "p164", "p165", "p166", "p167", "p168", "p169", "p170", "p171", "p172", "p173", "p174", "p175", "p176", "p177", "p178", "p179", "p180", "p181", "p182", "p183", "p184", "p185", "p186", "p187", "p188", "p189", "p190", "p191", "p192", "p193", "p194", "p195", "p196", "p197", "p198", "p199", "p200", "p201", "p202", "p203", "p204", "p205", "p206", "p207", "p208", "p209", "p210", "p211", "p212", "p213", "p214", "p215", "p216", "p217", "p218", "p219", "p220", "p221", "p222", "p223", "p224", "p225", "p226", "p227", "p228", "p229", "p230", "p231", "p232", "p233", "p234", "p235", "p236", "p237", "p238", "p239", "p240", "p241", "p242", "p243", "p244", "p245", "p246", "p247", "p248", "p249", "p250", "p251", "p252", "p253", "p254", "p255", "p256", "p257", "p258", "p259", "p260", "p261", "p262", "p263", "p264", "p265", "p266", "p267", "p268", "p269", "p270", "p271", "p272", "p273", "p274", "p275", "p276", "p277", "p278", "p279", "p280", "p281", "p282", "p283", "p284", "p285", "p286", "p287", "p288", "p289", "p290", "p291", "p292", "p293", "p294", "p295", "p296", "p297", "p298", "p299", "p300", "p301", "p302", "p303", "p304", "p305", "p306", "p307", "p308", "p309", "p310", "p311"], + "buffers": {"output": {"Absolute": 4000}, "b3": {"Absolute": 3211264}, "b4": {"Absolute": 3211264}, "b9": {"Absolute": 3211264}, "b298": {"Absolute": 100352}, "b307": {"Absolute": 100352}, "b319": {"Absolute": 1806336}, "b320": {"Absolute": 1806336}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_kernel", [{"Ptr": "b3"}, {"Ptr": "input"}, {"Ptr": "p0"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 112}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 16}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_max_pool2d_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p2"}, {"Ptr": "p3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 56}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p5"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p6"}, {"Ptr": "p7"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p10"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 56}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p13"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p14"}, {"Ptr": "p15"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p16"}, {"Ptr": "p17"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 56}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p20"}, {"Ptr": "p21"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_3_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p22"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_4_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p24"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p28"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 4}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p26"}, {"Ptr": "p27"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p30"}, {"Ptr": "p31"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p34"}, {"Ptr": "p35"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p36"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p38"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p40"}, {"Ptr": "p41"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p42"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p44"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p45"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p46"}, {"Ptr": "p47"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p48"}, {"Ptr": "p49"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p50"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p52"}, {"Ptr": "p53"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p54"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p56"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p57"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p58"}, {"Ptr": "p59"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p60"}, {"Ptr": "p61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p62"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p64"}, {"Ptr": "p65"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p66"}, {"Ptr": "p67"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p68"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p70"}, {"Ptr": "p71"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_6_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p72"}, {"Ptr": "p73"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 2}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_7_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p74"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p78"}, {"Ptr": "p79"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p76"}, {"Ptr": "p77"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p80"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p82"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p83"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p84"}, {"Ptr": "p85"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p86"}, {"Ptr": "p87"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p88"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p90"}, {"Ptr": "p91"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p92"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p94"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p95"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p96"}, {"Ptr": "p97"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p98"}, {"Ptr": "p99"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p100"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p101"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p102"}, {"Ptr": "p103"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p104"}, {"Ptr": "p105"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p106"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p108"}, {"Ptr": "p109"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p110"}, {"Ptr": "p111"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p112"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p113"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p114"}, {"Ptr": "p115"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p116"}, {"Ptr": "p117"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p118"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p119"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p120"}, {"Ptr": "p121"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p122"}, {"Ptr": "p123"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p124"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p125"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p126"}, {"Ptr": "p127"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p128"}, {"Ptr": "p129"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p130"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p131"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p132"}, {"Ptr": "p133"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p134"}, {"Ptr": "p135"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p136"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p137"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p138"}, {"Ptr": "p139"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p140"}, {"Ptr": "p141"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p142"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p143"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p144"}, {"Ptr": "p145"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p146"}, {"Ptr": "p147"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p148"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p149"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p150"}, {"Ptr": "p151"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p152"}, {"Ptr": "p153"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p154"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p155"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p156"}, {"Ptr": "p157"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p158"}, {"Ptr": "p159"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p160"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p161"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p162"}, {"Ptr": "p163"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p164"}, {"Ptr": "p165"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p166"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p167"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p168"}, {"Ptr": "p169"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p170"}, {"Ptr": "p171"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p172"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p173"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p174"}, {"Ptr": "p175"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p176"}, {"Ptr": "p177"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p178"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p179"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p180"}, {"Ptr": "p181"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p182"}, {"Ptr": "p183"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p184"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p185"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p186"}, {"Ptr": "p187"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p188"}, {"Ptr": "p189"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p190"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p191"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p192"}, {"Ptr": "p193"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p194"}, {"Ptr": "p195"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p196"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p197"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p198"}, {"Ptr": "p199"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p200"}, {"Ptr": "p201"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p202"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p203"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p204"}, {"Ptr": "p205"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p206"}, {"Ptr": "p207"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p208"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p209"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p210"}, {"Ptr": "p211"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p212"}, {"Ptr": "p213"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p214"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p215"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p216"}, {"Ptr": "p217"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p218"}, {"Ptr": "p219"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p220"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p221"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p222"}, {"Ptr": "p223"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p224"}, {"Ptr": "p225"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p226"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p227"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p228"}, {"Ptr": "p229"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p230"}, {"Ptr": "p231"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p232"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p233"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p234"}, {"Ptr": "p235"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p236"}, {"Ptr": "p237"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p238"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p239"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p240"}, {"Ptr": "p241"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p242"}, {"Ptr": "p243"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p244"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p245"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p246"}, {"Ptr": "p247"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p248"}, {"Ptr": "p249"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p250"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p251"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p252"}, {"Ptr": "p253"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p254"}, {"Ptr": "p255"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p256"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p257"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p258"}, {"Ptr": "p259"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p260"}, {"Ptr": "p261"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p262"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p263"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p264"}, {"Ptr": "p265"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p266"}, {"Ptr": "p267"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p268"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p269"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p270"}, {"Ptr": "p271"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p272"}, {"Ptr": "p273"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p274"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b320"}, {"Ptr": "p275"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p276"}, {"Ptr": "p277"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p278"}, {"Ptr": "p279"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p280"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b320"}, {"Ptr": "p281"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p282"}, {"Ptr": "p283"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p284"}, {"Ptr": "p285"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b319"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p286"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b320"}, {"Ptr": "p287"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p288"}, {"Ptr": "p289"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_9_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p290"}, {"Ptr": "p291"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_10_kernel", [{"Ptr": "b298"}, {"Ptr": "b9"}, {"Ptr": "p292"}, {"Ptr": "p293"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_3_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p296"}, {"Ptr": "p297"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel", [{"Ptr": "b9"}, {"Ptr": "b298"}, {"Ptr": "p294"}, {"Ptr": "p295"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 64}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_11_kernel", [{"Ptr": "b298"}, {"Ptr": "b9"}, {"Ptr": "p298"}, {"Ptr": "p299"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel", [{"Ptr": "b319"}, {"Ptr": "b298"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p300"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2", [{"Ptr": "b307"}, {"Ptr": "b320"}, {"Ptr": "p301"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel", [{"Ptr": "b4"}, {"Ptr": "b307"}, {"Ptr": "p302"}, {"Ptr": "p303"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 64}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_11_kernel", [{"Ptr": "b298"}, {"Ptr": "b4"}, {"Ptr": "p304"}, {"Ptr": "p305"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel", [{"Ptr": "b319"}, {"Ptr": "b298"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1", [{"Ptr": "b320"}, {"Ptr": "b319"}, {"Ptr": "p306"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2", [{"Ptr": "b307"}, {"Ptr": "b320"}, {"Ptr": "p307"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel", [{"Ptr": "b3"}, {"Ptr": "b307"}, {"Ptr": "p308"}, {"Ptr": "p309"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 64}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel", [{"Ptr": "b319"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel_1", [{"Ptr": "b298"}, {"Ptr": "b319"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "output"}, {"Ptr": "b298"}, {"Ptr": "p310"}, {"Ptr": "p311"}], { + "grid_dim_x": {"Absolute": 1000}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_resnet18.json b/machine_interface/tests/data/cuda/test_gpu_resnet18.json new file mode 100644 index 00000000..685f101c --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_resnet18.json @@ -0,0 +1,497 @@ +{ + "modules": [{"module_name": "resnet18.cubin", "path": "cuda/resnet18.cubin"}], + "kernels": [ + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_1"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_2"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_1"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_2"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_1"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_2"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_1"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_2"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_1_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_2_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_1_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_3_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel_1"}, + {"module_name": "resnet18.cubin", "kernel_name": "tvmgen_default_fused_nn_max_pool2d_kernel"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41"], + "buffers": {"output": {"Absolute": 4000}, "b3": {"Absolute": 3211264}, "b4": {"Absolute": 802816}, "b9": {"Absolute": 802816}, "b40": {"Absolute": 100352}, "b45": {"Absolute": 2048}, "b49": {"Absolute": 1806336}, "b50": {"Absolute": 1806336}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_kernel", [{"Ptr": "b3"}, {"Ptr": "input"}, {"Ptr": "p0"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 112}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 16}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_max_pool2d_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b49"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b50"}, {"Ptr": "p3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel", [{"Ptr": "b49"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b50"}, {"Ptr": "p5"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b49"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b50"}, {"Ptr": "p7"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel", [{"Ptr": "b49"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p8"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b50"}, {"Ptr": "p9"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p10"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p14"}, {"Ptr": "p15"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel", [{"Ptr": "b49"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b50"}, {"Ptr": "p13"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b49"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p16"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b50"}, {"Ptr": "p17"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel", [{"Ptr": "b49"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p18"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b50"}, {"Ptr": "p19"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p20"}, {"Ptr": "p21"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p24"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel", [{"Ptr": "b49"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p22"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b50"}, {"Ptr": "p23"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b49"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p26"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b50"}, {"Ptr": "p27"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel", [{"Ptr": "b49"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p28"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b50"}, {"Ptr": "p29"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_3_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p30"}, {"Ptr": "p31"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p34"}, {"Ptr": "p35"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel", [{"Ptr": "b49"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p32"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_2", [{"Ptr": "b40"}, {"Ptr": "b50"}, {"Ptr": "p33"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel", [{"Ptr": "b49"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b50"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel", [{"Ptr": "b49"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_1", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p38"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b50"}, {"Ptr": "p39"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel", [{"Ptr": "b49"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 16}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel_1", [{"Ptr": "b45"}, {"Ptr": "b49"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "output"}, {"Ptr": "b45"}, {"Ptr": "p40"}, {"Ptr": "p41"}], { + "grid_dim_x": {"Absolute": 1000}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_resnet34.json b/machine_interface/tests/data/cuda/test_gpu_resnet34.json new file mode 100644 index 00000000..4b95acad --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_resnet34.json @@ -0,0 +1,929 @@ +{ + "modules": [{"module_name": "resnet34.cubin", "path": "cuda/resnet34.cubin"}], + "kernels": [ + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_1"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_2"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_1"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_2"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_1"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_2"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_1"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_2"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_1_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_2_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_1_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_3_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel_1"}, + {"module_name": "resnet34.cubin", "kernel_name": "tvmgen_default_fused_nn_max_pool2d_kernel"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73"], + "buffers": {"output": {"Absolute": 4000}, "b3": {"Absolute": 3211264}, "b4": {"Absolute": 802816}, "b9": {"Absolute": 802816}, "b68": {"Absolute": 100352}, "b77": {"Absolute": 2048}, "b81": {"Absolute": 1806336}, "b82": {"Absolute": 1806336}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_kernel", [{"Ptr": "b3"}, {"Ptr": "input"}, {"Ptr": "p0"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 112}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 16}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_max_pool2d_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b82"}, {"Ptr": "p3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel", [{"Ptr": "b81"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b82"}, {"Ptr": "p5"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b81"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p7"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p8"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b82"}, {"Ptr": "p9"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b81"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p10"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b82"}, {"Ptr": "p13"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p14"}, {"Ptr": "p15"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p16"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b82"}, {"Ptr": "p17"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b81"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p20"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p21"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p22"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b82"}, {"Ptr": "p23"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b81"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p26"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b82"}, {"Ptr": "p27"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b81"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p28"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p30"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_1_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b82"}, {"Ptr": "p31"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p32"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_1_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p36"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p34"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b82"}, {"Ptr": "p35"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p38"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p40"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b82"}, {"Ptr": "p41"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p42"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b82"}, {"Ptr": "p45"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p46"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p47"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p48"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b82"}, {"Ptr": "p49"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p52"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b82"}, {"Ptr": "p53"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p54"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b82"}, {"Ptr": "p57"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_3_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p58"}, {"Ptr": "p59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_2_kernel", [{"Ptr": "b68"}, {"Ptr": "b9"}, {"Ptr": "p62"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p60"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b82"}, {"Ptr": "p61"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel", [{"Ptr": "b81"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p64"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2", [{"Ptr": "b68"}, {"Ptr": "b82"}, {"Ptr": "p65"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel", [{"Ptr": "b81"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p66"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b82"}, {"Ptr": "p67"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel", [{"Ptr": "b81"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p68"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2", [{"Ptr": "b68"}, {"Ptr": "b82"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel", [{"Ptr": "b81"}, {"Ptr": "b68"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_1", [{"Ptr": "b82"}, {"Ptr": "b81"}, {"Ptr": "p70"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_add_nn_relu_3_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b82"}, {"Ptr": "p71"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel", [{"Ptr": "b81"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 16}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel_1", [{"Ptr": "b77"}, {"Ptr": "b81"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "output"}, {"Ptr": "b77"}, {"Ptr": "p72"}, {"Ptr": "p73"}], { + "grid_dim_x": {"Absolute": 1000}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_resnet50.json b/machine_interface/tests/data/cuda/test_gpu_resnet50.json new file mode 100644 index 00000000..0c0cfbe1 --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_resnet50.json @@ -0,0 +1,795 @@ +{ + "modules": [{"module_name": "resnet50.cubin", "path": "cuda/resnet50.cubin"}], + "kernels": [ + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_1_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_2_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_3_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_10_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_11_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_1_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_3_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_4_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_6_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_7_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_9_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_nn_relu_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_global_avg_pool2d_kernel_1"}, + {"module_name": "resnet50.cubin", "kernel_name": "tvmgen_default_fused_nn_max_pool2d_kernel"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107"], + "buffers": {"output": {"Absolute": 4000}, "b3": {"Absolute": 3211264}, "b4": {"Absolute": 3211264}, "b9": {"Absolute": 3211264}, "b94": {"Absolute": 100352}, "b103": {"Absolute": 100352}, "b115": {"Absolute": 1806336}, "b116": {"Absolute": 1806336}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_kernel", [{"Ptr": "b3"}, {"Ptr": "input"}, {"Ptr": "p0"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 112}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 16}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_max_pool2d_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p2"}, {"Ptr": "p3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 56}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b115"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b116"}, {"Ptr": "p5"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p6"}, {"Ptr": "p7"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p10"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 56}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b115"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b116"}, {"Ptr": "p13"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p14"}, {"Ptr": "p15"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p16"}, {"Ptr": "p17"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 56}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel", [{"Ptr": "b115"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 36}, + "block_dim_x": {"Absolute": 98}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b116"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p20"}, {"Ptr": "p21"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_3_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p22"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_4_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p24"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 2}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p28"}, {"Ptr": "p29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 28}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 4}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p26"}, {"Ptr": "p27"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p30"}, {"Ptr": "p31"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b115"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b116"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p34"}, {"Ptr": "p35"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p36"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b115"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p38"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b116"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p40"}, {"Ptr": "p41"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_5_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p42"}, {"Ptr": "p43"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel", [{"Ptr": "b115"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p44"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_1_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b116"}, {"Ptr": "p45"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p46"}, {"Ptr": "p47"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_6_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p48"}, {"Ptr": "p49"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 2}, + "block_dim_y": {"Absolute": 4}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_7_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p50"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 4}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p54"}, {"Ptr": "p55"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p52"}, {"Ptr": "p53"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p56"}, {"Ptr": "p57"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b115"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p58"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b116"}, {"Ptr": "p59"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p60"}, {"Ptr": "p61"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p62"}, {"Ptr": "p63"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b115"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p64"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b116"}, {"Ptr": "p65"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p66"}, {"Ptr": "p67"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p68"}, {"Ptr": "p69"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b115"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p70"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b4"}, {"Ptr": "b116"}, {"Ptr": "p71"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b4"}, {"Ptr": "p72"}, {"Ptr": "p73"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b4"}, {"Ptr": "b3"}, {"Ptr": "p74"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b115"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p76"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b9"}, {"Ptr": "b116"}, {"Ptr": "p77"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p78"}, {"Ptr": "p79"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_8_kernel", [{"Ptr": "b9"}, {"Ptr": "b4"}, {"Ptr": "p80"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel", [{"Ptr": "b115"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p82"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 49}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_2_kernel_2", [{"Ptr": "b3"}, {"Ptr": "b116"}, {"Ptr": "p83"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_2_kernel", [{"Ptr": "b9"}, {"Ptr": "b3"}, {"Ptr": "p84"}, {"Ptr": "p85"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_9_kernel", [{"Ptr": "b3"}, {"Ptr": "b9"}, {"Ptr": "p86"}, {"Ptr": "p87"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 2}, + "block_dim_z": {"Absolute": 8}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_10_kernel", [{"Ptr": "b94"}, {"Ptr": "b3"}, {"Ptr": "p88"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 8}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_3_kernel", [{"Ptr": "b4"}, {"Ptr": "b9"}, {"Ptr": "p92"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel", [{"Ptr": "b3"}, {"Ptr": "b94"}, {"Ptr": "p90"}, {"Ptr": "p91"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 64}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_11_kernel", [{"Ptr": "b94"}, {"Ptr": "b3"}, {"Ptr": "p94"}, {"Ptr": "p95"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel", [{"Ptr": "b115"}, {"Ptr": "b94"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p96"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2", [{"Ptr": "b103"}, {"Ptr": "b116"}, {"Ptr": "p97"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel", [{"Ptr": "b9"}, {"Ptr": "b103"}, {"Ptr": "p98"}, {"Ptr": "p99"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 64}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_nn_relu_11_kernel", [{"Ptr": "b94"}, {"Ptr": "b9"}, {"Ptr": "p100"}, {"Ptr": "p101"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 7}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 32}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel", [{"Ptr": "b115"}, {"Ptr": "b94"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_1", [{"Ptr": "b116"}, {"Ptr": "b115"}, {"Ptr": "p102"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 16}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 16}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_contrib_conv2d_winograd_without_weight_transform_add_nn_relu_3_kernel_2", [{"Ptr": "b103"}, {"Ptr": "b116"}, {"Ptr": "p103"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_add_nn_relu_3_kernel", [{"Ptr": "b4"}, {"Ptr": "b103"}, {"Ptr": "p104"}, {"Ptr": "p105"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 64}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 7}, + "block_dim_z": {"Absolute": 16}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel", [{"Ptr": "b115"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_global_avg_pool2d_kernel_1", [{"Ptr": "b94"}, {"Ptr": "b115"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "output"}, {"Ptr": "b94"}, {"Ptr": "p106"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 1000}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_rnn.json b/machine_interface/tests/data/cuda/test_gpu_rnn.json new file mode 100644 index 00000000..19e1734e --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_rnn.json @@ -0,0 +1,5144 @@ +{ + "modules": [{"module_name": "rnn.cubin", "path": "cuda/rnn.cubin"}], + "kernels": [ + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_1_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_add_tanh_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_tanh_1_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_tanh_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_10_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_11_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_12_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_13_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_14_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_15_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_16_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_17_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_18_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_19_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_1_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_20_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_21_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_22_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_23_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_24_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_25_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_26_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_27_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_28_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_29_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_2_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_30_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_31_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_32_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_33_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_34_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_35_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_36_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_37_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_38_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_39_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_3_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_40_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_41_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_42_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_43_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_44_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_45_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_46_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_47_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_48_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_49_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_4_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_50_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_51_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_52_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_53_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_54_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_55_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_5_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_6_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_7_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_8_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_9_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_squeeze_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_1"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_10"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_11"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_12"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_13"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_14"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_15"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_16"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_17"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_18"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_19"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_2"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_20"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_21"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_22"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_23"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_24"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_25"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_26"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_27"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_3"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_4"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_5"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_6"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_7"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_8"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_9"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_stack_expand_dims_squeeze_transpose_take_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_1"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_10"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_11"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_12"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_13"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_14"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_15"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_16"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_17"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_18"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_19"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_2"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_20"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_21"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_22"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_23"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_24"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_25"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_26"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_27"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_3"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_4"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_5"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_6"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_7"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_8"}, + {"module_name": "rnn.cubin", "kernel_name": "tvmgen_default_fused_transpose_split_kernel_9"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26"], + "buffers": {"output": {"Absolute": 512}, "b1": {"Absolute": 512}, "b2": {"Absolute": 512}, "b3": {"Absolute": 512}, "b4": {"Absolute": 512}, "b5": {"Absolute": 512}, "b6": {"Absolute": 512}, "b7": {"Absolute": 512}, "b8": {"Absolute": 512}, "b9": {"Absolute": 512}, "b10": {"Absolute": 512}, "b11": {"Absolute": 512}, "b12": {"Absolute": 512}, "b13": {"Absolute": 512}, "b14": {"Absolute": 512}, "b15": {"Absolute": 512}, "b16": {"Absolute": 512}, "b17": {"Absolute": 512}, "b18": {"Absolute": 512}, "b19": {"Absolute": 512}, "b20": {"Absolute": 512}, "b21": {"Absolute": 512}, "b22": {"Absolute": 512}, "b23": {"Absolute": 512}, "b25": {"Absolute": 512}, "b26": {"Absolute": 512}, "b27": {"Absolute": 512}, "b28": {"Absolute": 512}, "b29": {"Absolute": 512}, "b32": {"Absolute": 512}, "b36": {"Absolute": 512}, "b37": {"Absolute": 512}, "b38": {"Absolute": 512}, "b39": {"Absolute": 512}, "b40": {"Absolute": 512}, "b41": {"Absolute": 512}, "b42": {"Absolute": 512}, "b43": {"Absolute": 512}, "b44": {"Absolute": 512}, "b45": {"Absolute": 512}, "b46": {"Absolute": 512}, "b47": {"Absolute": 512}, "b48": {"Absolute": 512}, "b49": {"Absolute": 512}, "b50": {"Absolute": 512}, "b51": {"Absolute": 512}, "b52": {"Absolute": 512}, "b53": {"Absolute": 512}, "b54": {"Absolute": 512}, "b55": {"Absolute": 512}, "b56": {"Absolute": 512}, "b57": {"Absolute": 512}, "b58": {"Absolute": 512}, "b59": {"Absolute": 512}, "b60": {"Absolute": 512}, "b61": {"Absolute": 512}, "b62": {"Absolute": 512}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel", [{"Ptr": "b1"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_1", [{"Ptr": "b2"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_2", [{"Ptr": "b3"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_3", [{"Ptr": "b4"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_4", [{"Ptr": "b5"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_5", [{"Ptr": "b6"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_6", [{"Ptr": "b7"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_7", [{"Ptr": "b8"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_8", [{"Ptr": "b9"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_9", [{"Ptr": "b10"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_10", [{"Ptr": "b11"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_11", [{"Ptr": "b12"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_12", [{"Ptr": "b13"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_13", [{"Ptr": "b14"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_14", [{"Ptr": "b15"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_15", [{"Ptr": "b16"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_16", [{"Ptr": "b17"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_17", [{"Ptr": "b18"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_18", [{"Ptr": "b19"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_19", [{"Ptr": "b20"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_20", [{"Ptr": "b21"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_21", [{"Ptr": "b22"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_22", [{"Ptr": "b23"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_23", [{"Ptr": "output"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_24", [{"Ptr": "b25"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_25", [{"Ptr": "b26"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_26", [{"Ptr": "b27"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_transpose_split_kernel_27", [{"Ptr": "b28"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_kernel", [{"Ptr": "b29"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_tanh_kernel", [{"Ptr": "b32"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b32"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b37"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_2_kernel", [{"Ptr": "b29"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b37"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b38"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_3_kernel", [{"Ptr": "b29"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b38"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b39"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_4_kernel", [{"Ptr": "b29"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b39"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b40"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_5_kernel", [{"Ptr": "b29"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b40"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b41"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_6_kernel", [{"Ptr": "b29"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b41"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b42"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_7_kernel", [{"Ptr": "b29"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b42"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b43"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_8_kernel", [{"Ptr": "b29"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b43"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b44"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_9_kernel", [{"Ptr": "b29"}, {"Ptr": "b10"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b44"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b45"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_10_kernel", [{"Ptr": "b29"}, {"Ptr": "b11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b45"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b46"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_11_kernel", [{"Ptr": "b29"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b46"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b47"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_12_kernel", [{"Ptr": "b29"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b47"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b48"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_13_kernel", [{"Ptr": "b29"}, {"Ptr": "b14"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b48"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b49"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_14_kernel", [{"Ptr": "b29"}, {"Ptr": "b15"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b49"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b50"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_15_kernel", [{"Ptr": "b29"}, {"Ptr": "b16"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b50"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b51"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_16_kernel", [{"Ptr": "b29"}, {"Ptr": "b17"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b51"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b52"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_17_kernel", [{"Ptr": "b29"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b52"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b53"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_18_kernel", [{"Ptr": "b29"}, {"Ptr": "b19"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b53"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b54"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_19_kernel", [{"Ptr": "b29"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b54"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b55"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_20_kernel", [{"Ptr": "b29"}, {"Ptr": "b21"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b55"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b56"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_21_kernel", [{"Ptr": "b29"}, {"Ptr": "b22"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b56"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b57"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_22_kernel", [{"Ptr": "b29"}, {"Ptr": "b23"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b57"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b58"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_23_kernel", [{"Ptr": "b29"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b58"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b59"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_24_kernel", [{"Ptr": "b29"}, {"Ptr": "b25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b59"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b60"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_25_kernel", [{"Ptr": "b29"}, {"Ptr": "b26"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b60"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b61"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_26_kernel", [{"Ptr": "b29"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b61"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b62"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_27_kernel", [{"Ptr": "b29"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 28}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b36"}, {"Ptr": "b62"}, {"Ptr": "p3"}, {"Ptr": "p4"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_kernel", [{"Ptr": "b28"}, {"Ptr": "b29"}, {"Ptr": "p0"}, {"Ptr": "p2"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel", [{"Ptr": "b36"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_1", [{"Ptr": "b29"}, {"Ptr": "b37"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_2", [{"Ptr": "b27"}, {"Ptr": "b38"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_3", [{"Ptr": "b26"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_4", [{"Ptr": "b25"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_5", [{"Ptr": "output"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_6", [{"Ptr": "b23"}, {"Ptr": "b42"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_7", [{"Ptr": "b22"}, {"Ptr": "b43"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_8", [{"Ptr": "b21"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_9", [{"Ptr": "b20"}, {"Ptr": "b45"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_10", [{"Ptr": "b19"}, {"Ptr": "b46"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_11", [{"Ptr": "b18"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_12", [{"Ptr": "b17"}, {"Ptr": "b48"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_13", [{"Ptr": "b16"}, {"Ptr": "b49"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_14", [{"Ptr": "b15"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_15", [{"Ptr": "b14"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_16", [{"Ptr": "b13"}, {"Ptr": "b52"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_17", [{"Ptr": "b12"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_18", [{"Ptr": "b11"}, {"Ptr": "b54"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_19", [{"Ptr": "b10"}, {"Ptr": "b55"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_20", [{"Ptr": "b9"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_21", [{"Ptr": "b8"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_22", [{"Ptr": "b7"}, {"Ptr": "b58"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_23", [{"Ptr": "b6"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_24", [{"Ptr": "b5"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_25", [{"Ptr": "b4"}, {"Ptr": "b61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_26", [{"Ptr": "b3"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_27", [{"Ptr": "b2"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_28_kernel", [{"Ptr": "b32"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_tanh_1_kernel", [{"Ptr": "b37"}, {"Ptr": "b32"}, {"Ptr": "p5"}, {"Ptr": "p6"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_29_kernel", [{"Ptr": "b38"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b37"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b38"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_30_kernel", [{"Ptr": "b41"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b42"}, {"Ptr": "b40"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b43"}, {"Ptr": "b41"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b42"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_31_kernel", [{"Ptr": "b44"}, {"Ptr": "b26"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b45"}, {"Ptr": "b43"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b46"}, {"Ptr": "b44"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b45"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_32_kernel", [{"Ptr": "b47"}, {"Ptr": "b25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b48"}, {"Ptr": "b46"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b49"}, {"Ptr": "b47"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b48"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_33_kernel", [{"Ptr": "b50"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b51"}, {"Ptr": "b49"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b52"}, {"Ptr": "b50"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_34_kernel", [{"Ptr": "b53"}, {"Ptr": "b23"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b54"}, {"Ptr": "b52"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b55"}, {"Ptr": "b53"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b54"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_35_kernel", [{"Ptr": "b56"}, {"Ptr": "b22"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b57"}, {"Ptr": "b55"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b58"}, {"Ptr": "b56"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_36_kernel", [{"Ptr": "b59"}, {"Ptr": "b21"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b60"}, {"Ptr": "b58"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b61"}, {"Ptr": "b59"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_37_kernel", [{"Ptr": "b62"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b28"}, {"Ptr": "b61"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b32"}, {"Ptr": "b62"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_38_kernel", [{"Ptr": "b38"}, {"Ptr": "b19"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b32"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b41"}, {"Ptr": "b38"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_39_kernel", [{"Ptr": "b42"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b41"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b45"}, {"Ptr": "b42"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_40_kernel", [{"Ptr": "b47"}, {"Ptr": "b17"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b48"}, {"Ptr": "b45"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b50"}, {"Ptr": "b47"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b48"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_41_kernel", [{"Ptr": "b51"}, {"Ptr": "b16"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b53"}, {"Ptr": "b50"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b54"}, {"Ptr": "b51"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_42_kernel", [{"Ptr": "b56"}, {"Ptr": "b15"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b57"}, {"Ptr": "b54"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b59"}, {"Ptr": "b56"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_43_kernel", [{"Ptr": "b60"}, {"Ptr": "b14"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b59"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b28"}, {"Ptr": "b60"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_44_kernel", [{"Ptr": "b38"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b28"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b42"}, {"Ptr": "b38"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_45_kernel", [{"Ptr": "b44"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b47"}, {"Ptr": "b42"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b48"}, {"Ptr": "b44"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_46_kernel", [{"Ptr": "b51"}, {"Ptr": "b11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b53"}, {"Ptr": "b48"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b56"}, {"Ptr": "b51"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_47_kernel", [{"Ptr": "b57"}, {"Ptr": "b10"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b60"}, {"Ptr": "b56"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b62"}, {"Ptr": "b57"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_48_kernel", [{"Ptr": "b38"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b62"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b38"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_49_kernel", [{"Ptr": "b47"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b51"}, {"Ptr": "b44"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b47"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_50_kernel", [{"Ptr": "b57"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b60"}, {"Ptr": "b53"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b38"}, {"Ptr": "b57"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_51_kernel", [{"Ptr": "b39"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b47"}, {"Ptr": "b38"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b51"}, {"Ptr": "b39"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_52_kernel", [{"Ptr": "b57"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b60"}, {"Ptr": "b51"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b57"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_53_kernel", [{"Ptr": "b47"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b57"}, {"Ptr": "b39"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b60"}, {"Ptr": "b47"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_54_kernel", [{"Ptr": "b47"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b57"}, {"Ptr": "b60"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b1"}, {"Ptr": "b47"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_55_kernel", [{"Ptr": "b47"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b57"}, {"Ptr": "b1"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b36"}, {"Ptr": "b47"}, {"Ptr": "p5"}, {"Ptr": "p7"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel", [{"Ptr": "b29"}, {"Ptr": "b37"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_1", [{"Ptr": "b27"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_2", [{"Ptr": "b26"}, {"Ptr": "b43"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_3", [{"Ptr": "b25"}, {"Ptr": "b46"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_4", [{"Ptr": "output"}, {"Ptr": "b49"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_5", [{"Ptr": "b23"}, {"Ptr": "b52"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_6", [{"Ptr": "b22"}, {"Ptr": "b55"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_7", [{"Ptr": "b21"}, {"Ptr": "b58"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_8", [{"Ptr": "b20"}, {"Ptr": "b61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_9", [{"Ptr": "b19"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_10", [{"Ptr": "b18"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_11", [{"Ptr": "b17"}, {"Ptr": "b45"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_12", [{"Ptr": "b16"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_13", [{"Ptr": "b15"}, {"Ptr": "b54"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_14", [{"Ptr": "b14"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_15", [{"Ptr": "b13"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_16", [{"Ptr": "b12"}, {"Ptr": "b42"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_17", [{"Ptr": "b11"}, {"Ptr": "b48"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_18", [{"Ptr": "b10"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_19", [{"Ptr": "b9"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_20", [{"Ptr": "b8"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_21", [{"Ptr": "b7"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_22", [{"Ptr": "b6"}, {"Ptr": "b38"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_23", [{"Ptr": "b5"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_24", [{"Ptr": "b4"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_25", [{"Ptr": "b3"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_26", [{"Ptr": "b2"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_27", [{"Ptr": "b47"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_28_kernel", [{"Ptr": "b57"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_tanh_1_kernel", [{"Ptr": "b37"}, {"Ptr": "b57"}, {"Ptr": "p10"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_29_kernel", [{"Ptr": "b40"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b43"}, {"Ptr": "b37"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b46"}, {"Ptr": "b40"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b43"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_30_kernel", [{"Ptr": "b49"}, {"Ptr": "b26"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b52"}, {"Ptr": "b46"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b55"}, {"Ptr": "b49"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b52"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_31_kernel", [{"Ptr": "b58"}, {"Ptr": "b25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b61"}, {"Ptr": "b55"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b32"}, {"Ptr": "b58"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b61"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_32_kernel", [{"Ptr": "b41"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b45"}, {"Ptr": "b32"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b50"}, {"Ptr": "b41"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b45"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_33_kernel", [{"Ptr": "b54"}, {"Ptr": "b23"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b59"}, {"Ptr": "b50"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b28"}, {"Ptr": "b54"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_34_kernel", [{"Ptr": "b42"}, {"Ptr": "b22"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b48"}, {"Ptr": "b28"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b56"}, {"Ptr": "b42"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b48"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_35_kernel", [{"Ptr": "b62"}, {"Ptr": "b21"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b56"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b62"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_36_kernel", [{"Ptr": "b38"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b51"}, {"Ptr": "b53"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b38"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_37_kernel", [{"Ptr": "b60"}, {"Ptr": "b19"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b39"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b36"}, {"Ptr": "b60"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_38_kernel", [{"Ptr": "b57"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b40"}, {"Ptr": "b36"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b43"}, {"Ptr": "b57"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_39_kernel", [{"Ptr": "b49"}, {"Ptr": "b17"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b52"}, {"Ptr": "b43"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b58"}, {"Ptr": "b49"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b52"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_40_kernel", [{"Ptr": "b61"}, {"Ptr": "b16"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b41"}, {"Ptr": "b58"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b45"}, {"Ptr": "b61"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_41_kernel", [{"Ptr": "b54"}, {"Ptr": "b15"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b59"}, {"Ptr": "b45"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b42"}, {"Ptr": "b54"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_42_kernel", [{"Ptr": "b48"}, {"Ptr": "b14"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b42"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b48"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_43_kernel", [{"Ptr": "b38"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b51"}, {"Ptr": "b44"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b60"}, {"Ptr": "b38"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_44_kernel", [{"Ptr": "b1"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b57"}, {"Ptr": "b60"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b1"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_45_kernel", [{"Ptr": "b49"}, {"Ptr": "b11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b52"}, {"Ptr": "b40"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b61"}, {"Ptr": "b49"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b52"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_46_kernel", [{"Ptr": "b41"}, {"Ptr": "b10"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b54"}, {"Ptr": "b61"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b59"}, {"Ptr": "b41"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b54"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_47_kernel", [{"Ptr": "b48"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b59"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b38"}, {"Ptr": "b48"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_48_kernel", [{"Ptr": "b51"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b38"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b57"}, {"Ptr": "b51"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_49_kernel", [{"Ptr": "b49"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b52"}, {"Ptr": "b57"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b41"}, {"Ptr": "b49"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b52"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_50_kernel", [{"Ptr": "b54"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b48"}, {"Ptr": "b41"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b62"}, {"Ptr": "b54"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b48"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_51_kernel", [{"Ptr": "b51"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b62"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b49"}, {"Ptr": "b51"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_52_kernel", [{"Ptr": "b52"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b54"}, {"Ptr": "b49"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b48"}, {"Ptr": "b52"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b54"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_53_kernel", [{"Ptr": "b51"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b48"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b52"}, {"Ptr": "b51"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_54_kernel", [{"Ptr": "b54"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b51"}, {"Ptr": "b52"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b1"}, {"Ptr": "b54"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_55_kernel", [{"Ptr": "b54"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b51"}, {"Ptr": "b1"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b54"}, {"Ptr": "p10"}, {"Ptr": "p12"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel", [{"Ptr": "b27"}, {"Ptr": "b37"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_1", [{"Ptr": "b26"}, {"Ptr": "b46"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_2", [{"Ptr": "b25"}, {"Ptr": "b55"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_3", [{"Ptr": "output"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_4", [{"Ptr": "b23"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_5", [{"Ptr": "b22"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_6", [{"Ptr": "b21"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_7", [{"Ptr": "b20"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_8", [{"Ptr": "b19"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_9", [{"Ptr": "b18"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_10", [{"Ptr": "b17"}, {"Ptr": "b43"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_11", [{"Ptr": "b16"}, {"Ptr": "b58"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_12", [{"Ptr": "b15"}, {"Ptr": "b45"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_13", [{"Ptr": "b14"}, {"Ptr": "b42"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_14", [{"Ptr": "b13"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_15", [{"Ptr": "b12"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_16", [{"Ptr": "b11"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_17", [{"Ptr": "b10"}, {"Ptr": "b61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_18", [{"Ptr": "b9"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_19", [{"Ptr": "b8"}, {"Ptr": "b38"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_20", [{"Ptr": "b7"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_21", [{"Ptr": "b6"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_22", [{"Ptr": "b5"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_23", [{"Ptr": "b4"}, {"Ptr": "b49"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_24", [{"Ptr": "b3"}, {"Ptr": "b48"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_25", [{"Ptr": "b2"}, {"Ptr": "b52"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_26", [{"Ptr": "b47"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_27", [{"Ptr": "b54"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_28_kernel", [{"Ptr": "b51"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_tanh_1_kernel", [{"Ptr": "b37"}, {"Ptr": "b51"}, {"Ptr": "p15"}, {"Ptr": "p16"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_29_kernel", [{"Ptr": "b46"}, {"Ptr": "b26"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b55"}, {"Ptr": "b37"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b32"}, {"Ptr": "b46"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b55"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_30_kernel", [{"Ptr": "b50"}, {"Ptr": "b25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b28"}, {"Ptr": "b32"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b56"}, {"Ptr": "b50"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_31_kernel", [{"Ptr": "b53"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b56"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b36"}, {"Ptr": "b53"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_32_kernel", [{"Ptr": "b43"}, {"Ptr": "b23"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b58"}, {"Ptr": "b36"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b45"}, {"Ptr": "b43"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b58"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_33_kernel", [{"Ptr": "b42"}, {"Ptr": "b22"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b45"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b60"}, {"Ptr": "b42"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_34_kernel", [{"Ptr": "b40"}, {"Ptr": "b21"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b61"}, {"Ptr": "b60"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b59"}, {"Ptr": "b40"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b61"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_35_kernel", [{"Ptr": "b38"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b57"}, {"Ptr": "b59"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b41"}, {"Ptr": "b38"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_36_kernel", [{"Ptr": "b62"}, {"Ptr": "b19"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b49"}, {"Ptr": "b41"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b48"}, {"Ptr": "b62"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b49"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_37_kernel", [{"Ptr": "b52"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b48"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b52"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_38_kernel", [{"Ptr": "b51"}, {"Ptr": "b17"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b46"}, {"Ptr": "b29"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b55"}, {"Ptr": "b51"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b46"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_39_kernel", [{"Ptr": "b50"}, {"Ptr": "b16"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b28"}, {"Ptr": "b55"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b50"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_40_kernel", [{"Ptr": "b39"}, {"Ptr": "b15"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b43"}, {"Ptr": "b53"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b58"}, {"Ptr": "b39"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b43"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_41_kernel", [{"Ptr": "b42"}, {"Ptr": "b14"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b44"}, {"Ptr": "b58"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b40"}, {"Ptr": "b42"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_42_kernel", [{"Ptr": "b61"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b38"}, {"Ptr": "b40"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b57"}, {"Ptr": "b61"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b38"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_43_kernel", [{"Ptr": "b62"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b49"}, {"Ptr": "b57"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b52"}, {"Ptr": "b62"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b49"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_44_kernel", [{"Ptr": "b1"}, {"Ptr": "b11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b51"}, {"Ptr": "b52"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b46"}, {"Ptr": "b1"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_45_kernel", [{"Ptr": "b50"}, {"Ptr": "b10"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b28"}, {"Ptr": "b46"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b50"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_46_kernel", [{"Ptr": "b43"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b42"}, {"Ptr": "b39"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b43"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b42"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_47_kernel", [{"Ptr": "b61"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b38"}, {"Ptr": "b44"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b62"}, {"Ptr": "b61"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b38"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_48_kernel", [{"Ptr": "b49"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b62"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b51"}, {"Ptr": "b49"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_49_kernel", [{"Ptr": "b50"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b28"}, {"Ptr": "b51"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b43"}, {"Ptr": "b50"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_50_kernel", [{"Ptr": "b42"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b61"}, {"Ptr": "b43"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b38"}, {"Ptr": "b42"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b61"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_51_kernel", [{"Ptr": "b49"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b38"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b50"}, {"Ptr": "b49"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_52_kernel", [{"Ptr": "b28"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b42"}, {"Ptr": "b50"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b61"}, {"Ptr": "b28"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b42"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_53_kernel", [{"Ptr": "b49"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b61"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b28"}, {"Ptr": "b49"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_54_kernel", [{"Ptr": "b42"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b49"}, {"Ptr": "b28"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b1"}, {"Ptr": "b42"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b49"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_55_kernel", [{"Ptr": "b42"}, {"Ptr": "b54"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b49"}, {"Ptr": "b1"}, {"Ptr": "p18"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b42"}, {"Ptr": "p15"}, {"Ptr": "p17"}, {"Ptr": "b49"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel", [{"Ptr": "b26"}, {"Ptr": "b37"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_1", [{"Ptr": "b25"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_2", [{"Ptr": "output"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_3", [{"Ptr": "b23"}, {"Ptr": "b36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_4", [{"Ptr": "b22"}, {"Ptr": "b45"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_5", [{"Ptr": "b21"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_6", [{"Ptr": "b20"}, {"Ptr": "b59"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_7", [{"Ptr": "b19"}, {"Ptr": "b41"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_8", [{"Ptr": "b18"}, {"Ptr": "b48"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_9", [{"Ptr": "b17"}, {"Ptr": "b29"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_10", [{"Ptr": "b16"}, {"Ptr": "b55"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_11", [{"Ptr": "b15"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_12", [{"Ptr": "b14"}, {"Ptr": "b58"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_13", [{"Ptr": "b13"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_14", [{"Ptr": "b12"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_15", [{"Ptr": "b11"}, {"Ptr": "b52"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_16", [{"Ptr": "b10"}, {"Ptr": "b46"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_17", [{"Ptr": "b9"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_18", [{"Ptr": "b8"}, {"Ptr": "b44"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_19", [{"Ptr": "b7"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_20", [{"Ptr": "b6"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_21", [{"Ptr": "b5"}, {"Ptr": "b43"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_22", [{"Ptr": "b4"}, {"Ptr": "b38"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_23", [{"Ptr": "b3"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_24", [{"Ptr": "b2"}, {"Ptr": "b61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_25", [{"Ptr": "b47"}, {"Ptr": "b28"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_26", [{"Ptr": "b54"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_split_kernel_27", [{"Ptr": "b42"}, {"Ptr": "b27"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_28_kernel", [{"Ptr": "b49"}, {"Ptr": "b26"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_tanh_1_kernel", [{"Ptr": "b37"}, {"Ptr": "b49"}, {"Ptr": "p20"}, {"Ptr": "p21"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_29_kernel", [{"Ptr": "b32"}, {"Ptr": "b25"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b56"}, {"Ptr": "b37"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b36"}, {"Ptr": "b32"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b56"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_30_kernel", [{"Ptr": "b45"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b60"}, {"Ptr": "b36"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b59"}, {"Ptr": "b45"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_31_kernel", [{"Ptr": "b41"}, {"Ptr": "b23"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b48"}, {"Ptr": "b59"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b29"}, {"Ptr": "b41"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b48"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_32_kernel", [{"Ptr": "b55"}, {"Ptr": "b22"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b53"}, {"Ptr": "b29"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b58"}, {"Ptr": "b55"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b53"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_33_kernel", [{"Ptr": "b40"}, {"Ptr": "b21"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b57"}, {"Ptr": "b58"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b52"}, {"Ptr": "b40"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_34_kernel", [{"Ptr": "b46"}, {"Ptr": "b20"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b52"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b44"}, {"Ptr": "b46"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_35_kernel", [{"Ptr": "b62"}, {"Ptr": "b19"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b51"}, {"Ptr": "b44"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b43"}, {"Ptr": "b62"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b51"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_36_kernel", [{"Ptr": "b38"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b43"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b61"}, {"Ptr": "b38"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_37_kernel", [{"Ptr": "b28"}, {"Ptr": "b17"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b61"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b27"}, {"Ptr": "b28"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_38_kernel", [{"Ptr": "b49"}, {"Ptr": "b16"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b32"}, {"Ptr": "b27"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b56"}, {"Ptr": "b49"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b32"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_39_kernel", [{"Ptr": "b45"}, {"Ptr": "b15"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b60"}, {"Ptr": "b56"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b41"}, {"Ptr": "b45"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_40_kernel", [{"Ptr": "b48"}, {"Ptr": "b14"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b55"}, {"Ptr": "b41"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b53"}, {"Ptr": "b48"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b55"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_41_kernel", [{"Ptr": "b40"}, {"Ptr": "b13"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b57"}, {"Ptr": "b53"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b46"}, {"Ptr": "b40"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b57"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_42_kernel", [{"Ptr": "b39"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b46"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b51"}, {"Ptr": "b39"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_43_kernel", [{"Ptr": "b38"}, {"Ptr": "b11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b51"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b28"}, {"Ptr": "b38"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_44_kernel", [{"Ptr": "b1"}, {"Ptr": "b10"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b49"}, {"Ptr": "b28"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b32"}, {"Ptr": "b1"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b49"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_45_kernel", [{"Ptr": "b45"}, {"Ptr": "b9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b60"}, {"Ptr": "b32"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b48"}, {"Ptr": "b45"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_46_kernel", [{"Ptr": "b55"}, {"Ptr": "b8"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b40"}, {"Ptr": "b48"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b57"}, {"Ptr": "b55"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_47_kernel", [{"Ptr": "b39"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b62"}, {"Ptr": "b57"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b38"}, {"Ptr": "b39"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b62"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_48_kernel", [{"Ptr": "b50"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b38"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b49"}, {"Ptr": "b50"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_49_kernel", [{"Ptr": "b45"}, {"Ptr": "b5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b60"}, {"Ptr": "b49"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b55"}, {"Ptr": "b45"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b60"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_50_kernel", [{"Ptr": "b40"}, {"Ptr": "b4"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b39"}, {"Ptr": "b55"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b62"}, {"Ptr": "b40"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b39"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_51_kernel", [{"Ptr": "b50"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b62"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b45"}, {"Ptr": "b50"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_52_kernel", [{"Ptr": "b60"}, {"Ptr": "b2"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b40"}, {"Ptr": "b45"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b39"}, {"Ptr": "b60"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b40"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_53_kernel", [{"Ptr": "b50"}, {"Ptr": "b47"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b39"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b60"}, {"Ptr": "b50"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_54_kernel", [{"Ptr": "b40"}, {"Ptr": "b54"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b60"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b1"}, {"Ptr": "b40"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_squeeze_55_kernel", [{"Ptr": "b40"}, {"Ptr": "b42"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b50"}, {"Ptr": "b1"}, {"Ptr": "p23"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_add_tanh_1_kernel", [{"Ptr": "b26"}, {"Ptr": "b40"}, {"Ptr": "p20"}, {"Ptr": "p22"}, {"Ptr": "b50"}], { + "grid_dim_x": {"Absolute": 128}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_stack_expand_dims_squeeze_transpose_take_kernel", [{"Ptr": "b25"}, {"Ptr": "b26"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "output"}, {"Ptr": "b25"}, {"Ptr": "p25"}, {"Ptr": "p26"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_simple.json b/machine_interface/tests/data/cuda/test_gpu_simple.json new file mode 100644 index 00000000..fdeaee28 --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_simple.json @@ -0,0 +1,22 @@ +{ + "modules": [{"module_name": "simple.cubin", "path": "cuda/simple.cubin"}], + "kernels": [ + {"module_name": "simple.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_kernel"} + ], + "blueprint": { + "inputs": ["test", "p0"], + "buffers": {"output": {"Absolute": 8}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "output"}, {"Ptr": "test"}, {"Ptr": "p0"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_test.json b/machine_interface/tests/data/cuda/test_gpu_test.json new file mode 100644 index 00000000..042be4be --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_test.json @@ -0,0 +1,256 @@ +{ + "modules": [{"module_name": "test.cubin", "path": "cuda/test.cubin"}], + "kernels": [ + {"module_name": "test.cubin", "kernel_name": "tvmgen_default_fused_argmax_kernel"}, + {"module_name": "test.cubin", "kernel_name": "tvmgen_default_fused_equal_logical_not_where_kernel"}, + {"module_name": "test.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_kernel"}, + {"module_name": "test.cubin", "kernel_name": "tvmgen_default_fused_scatter_nd_kernel"}, + {"module_name": "test.cubin", "kernel_name": "tvmgen_default_fused_scatter_nd_kernel_1"}, + {"module_name": "test.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_cast_where_reshape_broadcast_to_broadcast_to_r_b7dc2b146cd96eaf__1_kernel"}, + {"module_name": "test.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_cast_where_reshape_broadcast_to_broadcast_to_r_b7dc2b146cd96eaf__2_kernel"}, + {"module_name": "test.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_cast_where_reshape_broadcast_to_broadcast_to_r_b7dc2b146cd96eaf__3_kernel"}, + {"module_name": "test.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_cast_where_reshape_broadcast_to_broadcast_to_r_b7dc2b146cd96eaf__4_kernel"}, + {"module_name": "test.cubin", "kernel_name": "tvmgen_default_fused_take_equal_logical_not_take_cast_where_reshape_broadcast_to_broadcast_to_r_b7dc2b146cd96eaf__kernel"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1", "p2", "p3", "p4", "p5", "p6"], + "buffers": {"output": {"Absolute": 28}, "b1": {"Absolute": 28}, "b6": {"Absolute": 28}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_equal_logical_not_where_kernel", [{"Ptr": "b1"}, {"Ptr": "input"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 7}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "output"}, {"Ptr": "b1"}, {"Ptr": "p1"}, {"Ptr": "p2"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_argmax_kernel", [{"Ptr": "output"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_cast_where_reshape_broadcast_to_broadcast_to_r_b7dc2b146cd96eaf__kernel", [{"Ptr": "output"}, {"Ptr": "input"}, {"Ptr": "b1"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b1"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p0"}, {"Ptr": "output"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b6"}, {"Ptr": "p1"}, {"Ptr": "p2"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_argmax_kernel", [{"Ptr": "b1"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_cast_where_reshape_broadcast_to_broadcast_to_r_b7dc2b146cd96eaf__1_kernel", [{"Ptr": "b1"}, {"Ptr": "input"}, {"Ptr": "b6"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b6"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p3"}, {"Ptr": "b1"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b6"}, {"Ptr": "output"}, {"Ptr": "p1"}, {"Ptr": "p2"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_argmax_kernel", [{"Ptr": "b6"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_cast_where_reshape_broadcast_to_broadcast_to_r_b7dc2b146cd96eaf__2_kernel", [{"Ptr": "b6"}, {"Ptr": "input"}, {"Ptr": "output"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "output"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p4"}, {"Ptr": "b6"}, {"Ptr": "b1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "output"}, {"Ptr": "b1"}, {"Ptr": "p1"}, {"Ptr": "p2"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_argmax_kernel", [{"Ptr": "output"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_cast_where_reshape_broadcast_to_broadcast_to_r_b7dc2b146cd96eaf__3_kernel", [{"Ptr": "output"}, {"Ptr": "input"}, {"Ptr": "b1"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b1"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p5"}, {"Ptr": "output"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b1"}, {"Ptr": "b6"}, {"Ptr": "p1"}, {"Ptr": "p2"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_argmax_kernel", [{"Ptr": "b1"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_equal_logical_not_take_cast_where_reshape_broadcast_to_broadcast_to_r_b7dc2b146cd96eaf__4_kernel", [{"Ptr": "b1"}, {"Ptr": "input"}, {"Ptr": "b6"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel", [{"Ptr": "b6"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_scatter_nd_kernel_1", [{"Ptr": "p6"}, {"Ptr": "b1"}, {"Ptr": "output"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/cuda/test_gpu_vit_b_16.json b/machine_interface/tests/data/cuda/test_gpu_vit_b_16.json new file mode 100644 index 00000000..6e9a5193 --- /dev/null +++ b/machine_interface/tests/data/cuda/test_gpu_vit_b_16.json @@ -0,0 +1,3136 @@ +{ + "modules": [{"module_name": "vit_b_16.cubin", "path": "cuda/vit_b_16.cubin"}], + "kernels": [ + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_mean_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_mean_kernel_1"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_1_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_batch_matmul_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_conv2d_add_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_1_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_2_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_1_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_add_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_dense_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_1"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_2"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_nn_softmax_kernel_3"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_add_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_add_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_concatenate_add_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_reshape_transpose_reshape_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_take_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_variance_kernel"}, + {"module_name": "vit_b_16.cubin", "kernel_name": "tvmgen_default_fused_variance_kernel_1"} + ], + "blueprint": { + "inputs": ["input", "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29", "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39", "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49", "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59", "p60", "p61", "p62", "p63", "p64", "p65", "p66", "p67", "p68", "p69", "p70", "p71", "p72", "p73", "p74", "p75", "p76", "p77", "p78", "p79", "p80", "p81", "p82", "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91", "p92", "p93", "p94", "p95", "p96", "p97", "p98", "p99", "p100", "p101", "p102", "p103", "p104", "p105", "p106", "p107", "p108", "p109", "p110", "p111", "p112", "p113", "p114", "p115", "p116", "p117", "p118", "p119", "p120", "p121", "p122", "p123", "p124", "p125", "p126", "p127", "p128", "p129", "p130", "p131", "p132", "p133", "p134", "p135", "p136", "p137", "p138", "p139", "p140", "p141", "p142", "p143", "p144", "p145", "p146", "p147", "p148", "p149", "p150", "p151", "p152", "p153", "p154", "p155", "p156", "p157", "p158", "p159", "p160", "p161", "p162", "p163", "p164", "p165", "p166", "p167", "p168", "p169", "p170", "p171", "p172", "p173", "p174", "p175", "p176", "p177", "p178"], + "buffers": {"output": {"Absolute": 4000}, "b3": {"Absolute": 1862832}, "b6": {"Absolute": 1862832}, "b7": {"Absolute": 788}, "b12": {"Absolute": 2420736}, "b18": {"Absolute": 605184}, "b19": {"Absolute": 2420736}, "b185": {"Absolute": 3072}, "b188": {"Absolute": 12288}, "b189": {"Absolute": 1863680}}, + "outputs": ["output"], + "control_flow": [ + {"ExecKernel": ["tvmgen_default_fused_nn_conv2d_add_kernel", [{"Ptr": "b3"}, {"Ptr": "input"}, {"Ptr": "p0"}, {"Ptr": "p1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 14}, + "grid_dim_z": {"Absolute": 32}, + "block_dim_x": {"Absolute": 14}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 2}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_concatenate_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "p2"}, {"Ptr": "p3"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b6"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b6"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b3"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p4"}, {"Ptr": "p5"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b12"}, {"Ptr": "b3"}, {"Ptr": "p6"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p7"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b3"}, {"Ptr": "p8"}, {"Ptr": "p9"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "p10"}, {"Ptr": "p11"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b19"}, {"Ptr": "b12"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b19"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b19"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p13"}, {"Ptr": "p14"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p15"}, {"Ptr": "p16"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b6"}, {"Ptr": "p17"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b19"}, {"Ptr": "b12"}, {"Ptr": "p18"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b6"}, {"Ptr": "b19"}, {"Ptr": "p19"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b6"}, {"Ptr": "p20"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b3"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b3"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p21"}, {"Ptr": "p22"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b12"}, {"Ptr": "b6"}, {"Ptr": "p23"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b12"}, {"Ptr": "p24"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p25"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b6"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p26"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b6"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p27"}, {"Ptr": "p28"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p29"}, {"Ptr": "p30"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p31"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p32"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p33"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "p34"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b6"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b6"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p35"}, {"Ptr": "p36"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p37"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p38"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p39"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p40"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b3"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p41"}, {"Ptr": "p42"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p43"}, {"Ptr": "p44"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p45"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p46"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b6"}, {"Ptr": "b12"}, {"Ptr": "p47"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b6"}, {"Ptr": "p48"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b3"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b3"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p49"}, {"Ptr": "p50"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p51"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b6"}, {"Ptr": "p52"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p53"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b6"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p54"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b6"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p55"}, {"Ptr": "p56"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p57"}, {"Ptr": "p58"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b6"}, {"Ptr": "p59"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p60"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p61"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "p62"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b6"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b6"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p63"}, {"Ptr": "p64"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p65"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p66"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p67"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p68"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b3"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p69"}, {"Ptr": "p70"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p71"}, {"Ptr": "p72"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p73"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p74"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b6"}, {"Ptr": "b12"}, {"Ptr": "p75"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b6"}, {"Ptr": "p76"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b3"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b3"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p77"}, {"Ptr": "p78"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p79"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b6"}, {"Ptr": "p80"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p81"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b6"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p82"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b6"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p83"}, {"Ptr": "p84"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p85"}, {"Ptr": "p86"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b6"}, {"Ptr": "p87"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p88"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p89"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "p90"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b6"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b6"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p91"}, {"Ptr": "p92"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p93"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p94"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p95"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p96"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b3"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p97"}, {"Ptr": "p98"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p99"}, {"Ptr": "p100"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p101"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p102"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b6"}, {"Ptr": "b12"}, {"Ptr": "p103"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b6"}, {"Ptr": "p104"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b3"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b3"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p105"}, {"Ptr": "p106"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p107"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b6"}, {"Ptr": "p108"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p109"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b6"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p110"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b6"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p111"}, {"Ptr": "p112"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p113"}, {"Ptr": "p114"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b6"}, {"Ptr": "p115"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p116"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p117"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "p118"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b6"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b6"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p119"}, {"Ptr": "p120"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p121"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p122"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p123"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p124"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b3"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p125"}, {"Ptr": "p126"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p127"}, {"Ptr": "p128"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p129"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p130"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b6"}, {"Ptr": "b12"}, {"Ptr": "p131"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b6"}, {"Ptr": "p132"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b3"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b3"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p133"}, {"Ptr": "p134"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p135"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b6"}, {"Ptr": "p136"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p137"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b6"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p138"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b6"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p139"}, {"Ptr": "p140"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p141"}, {"Ptr": "p142"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b6"}, {"Ptr": "p143"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p144"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p145"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "p146"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b6"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b6"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p147"}, {"Ptr": "p148"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p149"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p150"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p151"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b3"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p152"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b3"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "p153"}, {"Ptr": "p154"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b3"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p155"}, {"Ptr": "p156"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b3"}, {"Ptr": "p157"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p158"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b6"}, {"Ptr": "b12"}, {"Ptr": "p159"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b3"}, {"Ptr": "b6"}, {"Ptr": "p160"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b3"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b3"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b3"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p161"}, {"Ptr": "p162"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p163"}], { + "grid_dim_x": {"Absolute": 2304}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_reshape_expand_dims_transpose_squeeze_kernel", [{"Ptr": "b19"}, {"Ptr": "b6"}, {"Ptr": "p164"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_multiply_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p8"}, {"Ptr": "p165"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_multiply_reshape_transpose_kernel", [{"Ptr": "b6"}, {"Ptr": "b19"}, {"Ptr": "p10"}, {"Ptr": "p166"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_kernel", [{"Ptr": "b12"}, {"Ptr": "b18"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 197}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel", [{"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_1", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_2", [{"Ptr": "b189"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_softmax_kernel_3", [{"Ptr": "b189"}, {"Ptr": "b188"}, {"Ptr": "b6"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_take_reshape_transpose_reshape_transpose_kernel", [{"Ptr": "b18"}, {"Ptr": "b19"}, {"Ptr": "p12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_batch_matmul_1_kernel", [{"Ptr": "b12"}, {"Ptr": "b6"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 12}, + "block_dim_x": {"Absolute": 8}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_reshape_kernel", [{"Ptr": "b18"}, {"Ptr": "b12"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "p167"}, {"Ptr": "p168"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_transpose_add_kernel", [{"Ptr": "b18"}, {"Ptr": "b6"}, {"Ptr": "b3"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b18"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b18"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_reshape_kernel", [{"Ptr": "b6"}, {"Ptr": "b18"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p169"}, {"Ptr": "p170"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_1_kernel", [{"Ptr": "b19"}, {"Ptr": "b6"}, {"Ptr": "p171"}], { + "grid_dim_x": {"Absolute": 3072}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_divide_erf_add_multiply_multiply_reshape_kernel", [{"Ptr": "b12"}, {"Ptr": "b19"}, {"Ptr": "p172"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_2_kernel", [{"Ptr": "b3"}, {"Ptr": "b12"}, {"Ptr": "p173"}], { + "grid_dim_x": {"Absolute": 768}, + "grid_dim_y": {"Absolute": 197}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_reshape_add_add_kernel", [{"Ptr": "b6"}, {"Ptr": "b3"}, {"Ptr": "p174"}, {"Ptr": "b18"}], { + "grid_dim_x": {"Absolute": 148}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel", [{"Ptr": "b6"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_mean_kernel_1", [{"Ptr": "b7"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel", [{"Ptr": "b188"}, {"Ptr": "b6"}, {"Ptr": "b7"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_variance_kernel_1", [{"Ptr": "output"}, {"Ptr": "b188"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 197}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_subtract_add_rsqrt_multiply_multiply_add_take_kernel", [{"Ptr": "b185"}, {"Ptr": "b6"}, {"Ptr": "b7"}, {"Ptr": "output"}, {"Ptr": "p175"}, {"Ptr": "p176"}, {"Ptr": "p8"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 768}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["tvmgen_default_fused_nn_dense_add_1_kernel", [{"Ptr": "output"}, {"Ptr": "b185"}, {"Ptr": "p177"}, {"Ptr": "p178"}], { + "grid_dim_x": {"Absolute": 1000}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_alexnet.json b/machine_interface/tests/data/hip/test_gpu_alexnet.json new file mode 100644 index 00000000..700593d0 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_alexnet.json @@ -0,0 +1,134 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_relu_0.hsaco", "path": "hip/alexnet/tem_fused_conv2d_relu_0.hsaco"}, + {"module_name": "poi_fused_conv2d_max_pool2d_relu_1.hsaco", "path": "hip/alexnet/poi_fused_conv2d_max_pool2d_relu_1.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_2.hsaco", "path": "hip/alexnet/tem_fused_conv2d_relu_2.hsaco"}, + {"module_name": "poi_fused_conv2d_max_pool2d_relu_3.hsaco", "path": "hip/alexnet/poi_fused_conv2d_max_pool2d_relu_3.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_4.hsaco", "path": "hip/alexnet/tem_fused_conv2d_relu_4.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_5.hsaco", "path": "hip/alexnet/tem_fused_conv2d_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_6.hsaco", "path": "hip/alexnet/tem_fused_conv2d_relu_6.hsaco"}, + {"module_name": "poi_fused_adaptive_avg_pool2d_conv2d_max_pool2d_relu_7.hsaco", "path": "hip/alexnet/poi_fused_adaptive_avg_pool2d_conv2d_max_pool2d_relu_7.hsaco"}, + {"module_name": "tem_fused_relu_8.hsaco", "path": "hip/alexnet/tem_fused_relu_8.hsaco"}, + {"module_name": "tem_fused_relu_9.hsaco", "path": "hip/alexnet/tem_fused_relu_9.hsaco"}, + {"module_name": "tem_fused_linear_relu_10.hsaco", "path": "hip/alexnet/tem_fused_linear_relu_10.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_relu_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_conv2d_max_pool2d_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "tem_fused_conv2d_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "poi_fused_conv2d_max_pool2d_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_conv2d_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "tem_fused_conv2d_relu_5.hsaco", "kernel_name": "kern005"}, + {"module_name": "tem_fused_conv2d_relu_6.hsaco", "kernel_name": "kern006"}, + {"module_name": "poi_fused_adaptive_avg_pool2d_conv2d_max_pool2d_relu_7.hsaco", "kernel_name": "kern007"}, + {"module_name": "tem_fused_relu_8.hsaco", "kernel_name": "kern008"}, + {"module_name": "tem_fused_relu_9.hsaco", "kernel_name": "kern009"}, + {"module_name": "tem_fused_linear_relu_10.hsaco", "kernel_name": "kern010"} + ], + "blueprint": { + "inputs": ["arg16_1", "features_0_weight", "features_0_bias", "var_6", "features_3_weight", "features_3_bias", "var_13", "features_6_weight", "features_6_bias", "features_8_weight", "features_8_bias", "features_10_weight", "features_10_bias", "var_28", "classifier_1_weight", "classifier_1_bias", "classifier_4_weight", "classifier_4_bias", "classifier_6_bias", "classifier_6_weight"], + "buffers": {"buf1": {"Absolute": 802816}, "buf2": {"Absolute": 186624}, "buf4": {"Absolute": 559872}, "buf5": {"Absolute": 129792}, "buf7": {"Absolute": 259584}, "buf9": {"Absolute": 173056}, "buf11": {"Absolute": 173056}, "buf12": {"Absolute": 36864}, "buf15": {"Absolute": 16384}, "buf17": {"Absolute": 16384}, "buf18": {"Absolute": 40}}, + "outputs": ["buf18"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg16_1"}, {"Ptr": "features_0_weight"}, {"Ptr": "features_0_bias"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf1"}, {"Ptr": "buf2"}, {"Ptr": "var_6"}], { + "grid_dim_x": {"Absolute": 183}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "features_3_weight"}, {"Ptr": "features_3_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 12}, + "grid_dim_y": {"Absolute": 3}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "buf5"}, {"Ptr": "var_13"}], { + "grid_dim_x": {"Absolute": 254}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf5"}, {"Ptr": "features_6_weight"}, {"Ptr": "features_6_bias"}, {"Ptr": "buf7"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 24}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern005", [{"Ptr": "buf7"}, {"Ptr": "features_8_weight"}, {"Ptr": "features_8_bias"}, {"Ptr": "buf9"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf9"}, {"Ptr": "features_10_weight"}, {"Ptr": "features_10_bias"}, {"Ptr": "buf11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern007", [{"Ptr": "buf12"}, {"Ptr": "buf11"}, {"Ptr": "var_28"}], { + "grid_dim_x": {"Absolute": 72}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf12"}, {"Ptr": "classifier_1_weight"}, {"Ptr": "classifier_1_bias"}, {"Ptr": "buf15"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 20480} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf15"}, {"Ptr": "classifier_4_weight"}, {"Ptr": "classifier_4_bias"}, {"Ptr": "buf17"}], { + "grid_dim_x": {"Absolute": 64}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 20480} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "classifier_6_bias"}, {"Ptr": "buf17"}, {"Ptr": "classifier_6_weight"}, {"Ptr": "buf18"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_basic_io.json b/machine_interface/tests/data/hip/test_gpu_basic_io.json new file mode 100644 index 00000000..c326ed39 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_basic_io.json @@ -0,0 +1,20 @@ +{ + "modules": [ { "module_name": "module.hsaco", "path": "module.hsaco" } ], + "kernels": [ { "module_name": "module.hsaco", "kernel_name": "check_then_write" } ], + "blueprint": { + "inputs": ["A"], + "buffers": {}, + "outputs": ["A"], + "control_flow": [ + {"ExecKernel": ["check_then_write", [{"Ptr": "A"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_batch_norm.json b/machine_interface/tests/data/hip/test_gpu_batch_norm.json new file mode 100644 index 00000000..aa54671e --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_batch_norm.json @@ -0,0 +1,57 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "path": "hip/batch_norm/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco"}, + {"module_name": "poi_fused_avg_pool2d_conv2d_max_pool2d_miopen_batch_norm_relu_1.hsaco", "path": "hip/batch_norm/poi_fused_avg_pool2d_conv2d_max_pool2d_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "poi_fused_2.hsaco", "path": "hip/batch_norm/poi_fused_2.hsaco"}, + {"module_name": "tem_fused_linear_3.hsaco", "path": "hip/batch_norm/tem_fused_linear_3.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_avg_pool2d_conv2d_max_pool2d_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "poi_fused_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_linear_3.hsaco", "kernel_name": "kern003"} + ], + "blueprint": { + "inputs": ["arg9_1", "conv_weight", "conv_bias", "bn_running_mean", "bn_running_var", "bn_weight", "bn_bias", "var_10", "var_12", "fc_weight"], + "buffers": {"buf1": {"Absolute": 6400}, "buf2": {"Absolute": 1024}, "buf4": {"Absolute": 12}, "buf5": {"Absolute": 12}}, + "outputs": ["buf5"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg9_1"}, {"Ptr": "conv_weight"}, {"Ptr": "conv_bias"}, {"Ptr": "bn_running_mean"}, {"Ptr": "bn_running_var"}, {"Ptr": "bn_weight"}, {"Ptr": "bn_bias"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 2560} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf2"}, {"Ptr": "buf1"}, {"Ptr": "var_10"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf4"}, {"Ptr": "var_12"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "buf2"}, {"Ptr": "fc_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_double_matmul.json b/machine_interface/tests/data/hip/test_gpu_double_matmul.json new file mode 100644 index 00000000..38bd15da --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_double_matmul.json @@ -0,0 +1,35 @@ +{ + "modules": [ + {"module_name": "tem_fused_linear_0.hsaco", "path": "hip/double_matmul/tem_fused_linear_0.hsaco"}, + {"module_name": "tem_fused_linear_1.hsaco", "path": "hip/double_matmul/tem_fused_linear_1.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_linear_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "tem_fused_linear_1.hsaco", "kernel_name": "kern001"} + ], + "blueprint": { + "inputs": ["arg2_1", "linear1_weight", "linear2_weight"], + "buffers": {"buf0": {"Absolute": 80}, "buf1": {"Absolute": 48}}, + "outputs": ["buf1"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg2_1"}, {"Ptr": "linear1_weight"}, {"Ptr": "buf0"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 1024} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf0"}, {"Ptr": "linear2_weight"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 1024} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_first_double_matmul.json b/machine_interface/tests/data/hip/test_gpu_first_double_matmul.json new file mode 100644 index 00000000..7d2e9661 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_first_double_matmul.json @@ -0,0 +1,24 @@ +{ + "modules": [ + { "module_name": "tem_fused_linear_0.hsaco", "path": "hip/double_matmul/tem_fused_linear_0.hsaco" } + ], + "kernels": [ + { "module_name": "tem_fused_linear_0.hsaco", "kernel_name": "kern000" } + ], + "blueprint": { + "inputs": ["A", "W"], + "buffers": {"B": {"Absolute": 80}}, + "outputs": ["B"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "A"}, {"Ptr": "W"}, {"Ptr": "B"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 1024} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_full_double_matmul.json b/machine_interface/tests/data/hip/test_gpu_full_double_matmul.json new file mode 100644 index 00000000..38bd15da --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_full_double_matmul.json @@ -0,0 +1,35 @@ +{ + "modules": [ + {"module_name": "tem_fused_linear_0.hsaco", "path": "hip/double_matmul/tem_fused_linear_0.hsaco"}, + {"module_name": "tem_fused_linear_1.hsaco", "path": "hip/double_matmul/tem_fused_linear_1.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_linear_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "tem_fused_linear_1.hsaco", "kernel_name": "kern001"} + ], + "blueprint": { + "inputs": ["arg2_1", "linear1_weight", "linear2_weight"], + "buffers": {"buf0": {"Absolute": 80}, "buf1": {"Absolute": 48}}, + "outputs": ["buf1"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg2_1"}, {"Ptr": "linear1_weight"}, {"Ptr": "buf0"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 1024} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf0"}, {"Ptr": "linear2_weight"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 1024} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_hello_world.json b/machine_interface/tests/data/hip/test_gpu_hello_world.json new file mode 100644 index 00000000..17f551ae --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_hello_world.json @@ -0,0 +1,20 @@ +{ + "modules": [ { "module_name": "module.hsaco", "path": "module.hsaco" } ], + "kernels": [ { "module_name": "module.hsaco", "kernel_name": "hello_world" } ], + "blueprint": { + "inputs": [], + "buffers": {}, + "outputs": [], + "control_flow": [ + {"ExecKernel": ["hello_world", [], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_inference.json b/machine_interface/tests/data/hip/test_gpu_inference.json new file mode 100644 index 00000000..1feea9aa --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_inference.json @@ -0,0 +1,55 @@ +{ + "modules": [ { "module_name": "mlops.hsaco", "path": "mlops.hsaco" }, { "module_name": "module.hsaco", "path": "module.hsaco" } ], + "kernels": [ + { "module_name": "mlops.hsaco", "kernel_name": "matmul_f" }, + { "module_name": "mlops.hsaco", "kernel_name": "convolution_f" }, + { "module_name": "mlops.hsaco", "kernel_name": "relu_2d_f" }, + { "module_name": "mlops.hsaco", "kernel_name": "maxpool_f" }, + { "module_name": "module.hsaco", "kernel_name": "hello_world" } + ], + "blueprint": { + "inputs": ["cfg", "A", "B"], + "buffers": {"C": {"Sizeof": "A"}, "D": {"FromInput": {"bufname": "cfg", "idx": 0}}}, + "outputs": ["D"], + "control_flow": [ + {"Repeat": [{"FromInput": {"bufname": "cfg", "idx": 3}}, [ + {"ExecKernel": ["hello_world", [], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["convolution_f", [{"Ptr": "C"}, {"Ptr": "A"}, {"Ptr": "B"}], { + "grid_dim_x": {"FromInput": {"bufname": "cfg", "idx": 1}}, + "grid_dim_y": {"FromInput": {"bufname": "cfg", "idx": 1}}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["relu_2d_f", [{"Ptr": "C"}], { + "grid_dim_x": {"FromInput": {"bufname": "cfg", "idx": 1}}, + "grid_dim_y": {"FromInput": {"bufname": "cfg", "idx": 1}}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["maxpool_f", [{"Ptr": "D"}, {"Ptr": "C"}, {"Constant": 2}], { + "grid_dim_x": {"FromInput": {"bufname": "cfg", "idx": 2}}, + "grid_dim_y": {"FromInput": {"bufname": "cfg", "idx": 2}}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}] + ]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_inference_batched.json b/machine_interface/tests/data/hip/test_gpu_inference_batched.json new file mode 100644 index 00000000..705a6574 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_inference_batched.json @@ -0,0 +1,44 @@ +{ + "modules": [ { "module_name": "mlops.hsaco", "path": "mlops.hsaco" } ], + "kernels": [ + { "module_name": "mlops.hsaco", "kernel_name": "convolution_f_batched" }, + { "module_name": "mlops.hsaco", "kernel_name": "relu_2d_f_batched" }, + { "module_name": "mlops.hsaco", "kernel_name": "maxpool_f_batched" } + ], + "blueprint": { + "inputs": ["cfg", "A", "B"], + "buffers": {"C": {"Sizeof": "A"}, "D": {"FromInput": {"bufname": "cfg", "idx": 0}}}, + "outputs": ["D"], + "control_flow": [ + {"Repeat": [{"FromInput": {"bufname": "cfg", "idx": 3}}, [ + {"ExecKernel": ["convolution_f_batched", [{"Ptr": "C"}, {"Ptr": "A"}, {"Ptr": "B"}, {"Ptr": "cfg"}], { + "grid_dim_x": {"FromInput": {"bufname": "cfg", "idx": 1}}, + "grid_dim_y": {"FromInput": {"bufname": "cfg", "idx": 1}}, + "grid_dim_z": {"FromInput": {"bufname": "cfg", "idx": 4}}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["relu_2d_f_batched", [{"Ptr": "C"}, {"Ptr": "cfg"}], { + "grid_dim_x": {"FromInput": {"bufname": "cfg", "idx": 1}}, + "grid_dim_y": {"FromInput": {"bufname": "cfg", "idx": 1}}, + "grid_dim_z": {"FromInput": {"bufname": "cfg", "idx": 4}}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["maxpool_f_batched", [{"Ptr": "D"}, {"Ptr": "C"}, {"Constant": 2}, {"Ptr": "cfg"}], { + "grid_dim_x": {"FromInput": {"bufname": "cfg", "idx": 2}}, + "grid_dim_y": {"FromInput": {"bufname": "cfg", "idx": 2}}, + "grid_dim_z": {"FromInput": {"bufname": "cfg", "idx": 4}}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}] + ]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_lenet5.json b/machine_interface/tests/data/hip/test_gpu_lenet5.json new file mode 100644 index 00000000..50a875b2 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_lenet5.json @@ -0,0 +1,101 @@ +{ + "modules": [ + {"module_name": "poi_fused_conv2d_0.hsaco", "path": "hip/lenet5/poi_fused_conv2d_0.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_1.hsaco", "path": "hip/lenet5/tem_fused_conv2d_relu_1.hsaco"}, + {"module_name": "poi_fused_conv2d_max_pool2d_relu_2.hsaco", "path": "hip/lenet5/poi_fused_conv2d_max_pool2d_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_max_pool2d_relu_3.hsaco", "path": "hip/lenet5/tem_fused_conv2d_max_pool2d_relu_3.hsaco"}, + {"module_name": "poi_fused_conv2d_max_pool2d_relu_4.hsaco", "path": "hip/lenet5/poi_fused_conv2d_max_pool2d_relu_4.hsaco"}, + {"module_name": "tem_fused_relu_5.hsaco", "path": "hip/lenet5/tem_fused_relu_5.hsaco"}, + {"module_name": "tem_fused_relu_6.hsaco", "path": "hip/lenet5/tem_fused_relu_6.hsaco"}, + {"module_name": "tem_fused_linear_relu_7.hsaco", "path": "hip/lenet5/tem_fused_linear_relu_7.hsaco"} + ], + "kernels": [ + {"module_name": "poi_fused_conv2d_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "tem_fused_conv2d_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "poi_fused_conv2d_max_pool2d_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_max_pool2d_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "poi_fused_conv2d_max_pool2d_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "tem_fused_relu_5.hsaco", "kernel_name": "kern005"}, + {"module_name": "tem_fused_relu_6.hsaco", "kernel_name": "kern006"}, + {"module_name": "tem_fused_linear_relu_7.hsaco", "kernel_name": "kern007"} + ], + "blueprint": { + "inputs": ["var_1", "arg10_1", "conv1_weight", "var_8", "conv2_weight", "conv2_bias", "var_15", "fc1_weight", "fc1_bias", "fc2_weight", "fc2_bias", "fc3_bias", "fc3_weight"], + "buffers": {"buf0": {"Absolute": 24}, "buf2": {"Absolute": 18816}, "buf3": {"Absolute": 4704}, "buf5": {"Absolute": 6400}, "buf6": {"Absolute": 1600}, "buf8": {"Absolute": 480}, "buf10": {"Absolute": 336}, "buf11": {"Absolute": 40}}, + "outputs": ["buf11"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "buf0"}, {"Ptr": "var_1"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "arg10_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "buf0"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 5120} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "buf3"}, {"Ptr": "var_8"}], { + "grid_dim_x": {"Absolute": 10}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf3"}, {"Ptr": "conv2_weight"}, {"Ptr": "conv2_bias"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 5120} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf5"}, {"Ptr": "buf6"}, {"Ptr": "var_15"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern005", [{"Ptr": "buf6"}, {"Ptr": "fc1_weight"}, {"Ptr": "fc1_bias"}, {"Ptr": "buf8"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf8"}, {"Ptr": "fc2_weight"}, {"Ptr": "fc2_bias"}, {"Ptr": "buf10"}], { + "grid_dim_x": {"Absolute": 3}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern007", [{"Ptr": "fc3_bias"}, {"Ptr": "buf10"}, {"Ptr": "fc3_weight"}, {"Ptr": "buf11"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_matmul_loop.json b/machine_interface/tests/data/hip/test_gpu_matmul_loop.json new file mode 100644 index 00000000..80950aee --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_matmul_loop.json @@ -0,0 +1,20 @@ +{ + "modules": [ { "module_name": "mlops.hsaco", "path": "mlops.hsaco" } ], + "kernels": [ { "module_name": "mlops.hsaco", "kernel_name": "matmul_loop" } ], + "blueprint": { + "inputs": ["A"], + "buffers": {"B": {"Sizeof": "A"}}, + "outputs": ["B"], + "control_flow": [ + {"ExecKernel": ["matmul_loop", [{"Ptr": "A"}, {"Sizeof": "A"}, {"Ptr": "B"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_matmul_para.json b/machine_interface/tests/data/hip/test_gpu_matmul_para.json new file mode 100644 index 00000000..eb27a64e --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_matmul_para.json @@ -0,0 +1,20 @@ +{ + "modules": [ { "module_name": "mlops.hsaco", "path": "mlops.hsaco" } ], + "kernels": [ { "module_name": "mlops.hsaco", "kernel_name": "matmul_para" } ], + "blueprint": { + "inputs": ["cfg", "A"], + "buffers": {"B": {"Sizeof": "A"}}, + "outputs": ["B"], + "control_flow": [ + {"ExecKernel": ["matmul_para", [{"Ptr": "A"}, {"Sizeof": "A"}, {"Ptr": "B"}], { + "grid_dim_x": {"FromInput": {"bufname": "cfg", "idx": 0}}, + "grid_dim_y": {"FromInput": {"bufname": "cfg", "idx": 0}}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 32}, + "block_dim_y": {"Absolute": 32}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_minimal.json b/machine_interface/tests/data/hip/test_gpu_minimal.json new file mode 100644 index 00000000..57253320 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_minimal.json @@ -0,0 +1,24 @@ +{ + "modules": [ { "module_name": "module.hsaco", "path": "module.hsaco" } ], + "kernels": [ + { "module_name": "module.hsaco", "kernel_name": "set_mem" }, + { "module_name": "module.hsaco", "kernel_name": "check_mem" }, + { "module_name": "module.hsaco", "kernel_name": "hello_world" } + ], + "blueprint": { + "inputs": [], + "buffers": {"A": {"Absolute": 1024}}, + "outputs": [], + "control_flow": [ + {"ExecKernel": ["set_mem", [{"Ptr": "A"}, {"Sizeof": "A"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 1024}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet152.json b/machine_interface/tests/data/hip/test_gpu_resnet152.json new file mode 100644 index 00000000..5b0b6a73 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet152.json @@ -0,0 +1,1812 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet152/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco", "path": "hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco", "path": "hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco", "path": "hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco"}, + {"module_name": "tem_fused_conv2d_24.hsaco", "path": "hip/resnet152/tem_fused_conv2d_24.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_25.hsaco", "path": "hip/resnet152/poi_fused_add_miopen_batch_norm_relu_25.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco", "path": "hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco", "path": "hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_28.hsaco", "path": "hip/resnet152/poi_fused_add_miopen_batch_norm_relu_28.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco", "path": "hip/resnet152/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco"}, + {"module_name": "tem_fused_linear_30.hsaco", "path": "hip/resnet152/tem_fused_linear_30.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern005"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "kernel_name": "kern006"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern008"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern013"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern014"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern015"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern016"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern017"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern018"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern037"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern038"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco", "kernel_name": "kern039"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern040"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern041"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco", "kernel_name": "kern043"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco", "kernel_name": "kern045"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco", "kernel_name": "kern182"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco", "kernel_name": "kern183"}, + {"module_name": "tem_fused_conv2d_24.hsaco", "kernel_name": "kern184"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_25.hsaco", "kernel_name": "kern185"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco", "kernel_name": "kern186"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco", "kernel_name": "kern187"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_28.hsaco", "kernel_name": "kern188"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco", "kernel_name": "kern191"}, + {"module_name": "tem_fused_linear_30.hsaco", "kernel_name": "kern192"} + ], + "blueprint": { + "inputs": ["arg932_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_9", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "layer1_0_conv3_weight", "layer1_0_downsample_0_weight", "layer1_0_bn3_running_mean", "layer1_0_bn3_running_var", "layer1_0_bn3_weight", "layer1_0_bn3_bias", "layer1_0_downsample_1_running_mean", "layer1_0_downsample_1_running_var", "layer1_0_downsample_1_weight", "layer1_0_downsample_1_bias", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "layer1_1_conv3_weight", "layer1_1_bn3_running_mean", "layer1_1_bn3_running_var", "layer1_1_bn3_weight", "layer1_1_bn3_bias", "layer1_2_conv1_weight", "layer1_2_bn1_running_mean", "layer1_2_bn1_running_var", "layer1_2_bn1_weight", "layer1_2_bn1_bias", "layer1_2_conv2_weight", "layer1_2_bn2_running_mean", "layer1_2_bn2_running_var", "layer1_2_bn2_weight", "layer1_2_bn2_bias", "layer1_2_conv3_weight", "layer1_2_bn3_running_mean", "layer1_2_bn3_running_var", "layer1_2_bn3_weight", "layer1_2_bn3_bias", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_conv2_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_conv3_weight", "layer2_0_downsample_0_weight", "layer2_0_bn3_running_mean", "layer2_0_bn3_running_var", "layer2_0_bn3_weight", "layer2_0_bn3_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "layer2_1_conv3_weight", "layer2_1_bn3_running_mean", "layer2_1_bn3_running_var", "layer2_1_bn3_weight", "layer2_1_bn3_bias", "layer2_2_conv1_weight", "layer2_2_bn1_running_mean", "layer2_2_bn1_running_var", "layer2_2_bn1_weight", "layer2_2_bn1_bias", "layer2_2_conv2_weight", "layer2_2_bn2_running_mean", "layer2_2_bn2_running_var", "layer2_2_bn2_weight", "layer2_2_bn2_bias", "layer2_2_conv3_weight", "layer2_2_bn3_running_mean", "layer2_2_bn3_running_var", "layer2_2_bn3_weight", "layer2_2_bn3_bias", "layer2_3_conv1_weight", "layer2_3_bn1_running_mean", "layer2_3_bn1_running_var", "layer2_3_bn1_weight", "layer2_3_bn1_bias", "layer2_3_conv2_weight", "layer2_3_bn2_running_mean", "layer2_3_bn2_running_var", "layer2_3_bn2_weight", "layer2_3_bn2_bias", "layer2_3_conv3_weight", "layer2_3_bn3_running_mean", "layer2_3_bn3_running_var", "layer2_3_bn3_weight", "layer2_3_bn3_bias", "layer2_4_conv1_weight", "layer2_4_bn1_running_mean", "layer2_4_bn1_running_var", "layer2_4_bn1_weight", "layer2_4_bn1_bias", "layer2_4_conv2_weight", "layer2_4_bn2_running_mean", "layer2_4_bn2_running_var", "layer2_4_bn2_weight", "layer2_4_bn2_bias", "layer2_4_conv3_weight", "layer2_4_bn3_running_mean", "layer2_4_bn3_running_var", "layer2_4_bn3_weight", "layer2_4_bn3_bias", "layer2_5_conv1_weight", "layer2_5_bn1_running_mean", "layer2_5_bn1_running_var", "layer2_5_bn1_weight", "layer2_5_bn1_bias", "layer2_5_conv2_weight", "layer2_5_bn2_running_mean", "layer2_5_bn2_running_var", "layer2_5_bn2_weight", "layer2_5_bn2_bias", "layer2_5_conv3_weight", "layer2_5_bn3_running_mean", "layer2_5_bn3_running_var", "layer2_5_bn3_weight", "layer2_5_bn3_bias", "layer2_6_conv1_weight", "layer2_6_bn1_running_mean", "layer2_6_bn1_running_var", "layer2_6_bn1_weight", "layer2_6_bn1_bias", "layer2_6_conv2_weight", "layer2_6_bn2_running_mean", "layer2_6_bn2_running_var", "layer2_6_bn2_weight", "layer2_6_bn2_bias", "layer2_6_conv3_weight", "layer2_6_bn3_running_mean", "layer2_6_bn3_running_var", "layer2_6_bn3_weight", "layer2_6_bn3_bias", "layer2_7_conv1_weight", "layer2_7_bn1_running_mean", "layer2_7_bn1_running_var", "layer2_7_bn1_weight", "layer2_7_bn1_bias", "layer2_7_conv2_weight", "layer2_7_bn2_running_mean", "layer2_7_bn2_running_var", "layer2_7_bn2_weight", "layer2_7_bn2_bias", "layer2_7_conv3_weight", "layer2_7_bn3_running_mean", "layer2_7_bn3_running_var", "layer2_7_bn3_weight", "layer2_7_bn3_bias", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "var_281", "layer3_0_conv3_weight", "layer3_0_downsample_0_weight", "layer3_0_bn3_running_mean", "layer3_0_bn3_running_var", "layer3_0_bn3_weight", "layer3_0_bn3_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "var_302", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_311", "layer3_1_conv3_weight", "layer3_1_bn3_running_mean", "layer3_1_bn3_running_var", "layer3_1_bn3_weight", "layer3_1_bn3_bias", "layer3_2_bn1_running_mean", "layer3_2_bn1_running_var", "layer3_2_bn1_weight", "layer3_2_bn1_bias", "var_325", "layer3_2_conv2_weight", "layer3_2_bn2_running_mean", "layer3_2_bn2_running_var", "layer3_2_bn2_weight", "layer3_2_bn2_bias", "var_334", "layer3_2_conv3_weight", "layer3_2_bn3_running_mean", "layer3_2_bn3_running_var", "layer3_2_bn3_weight", "layer3_2_bn3_bias", "layer3_3_bn1_running_mean", "layer3_3_bn1_running_var", "layer3_3_bn1_weight", "layer3_3_bn1_bias", "var_348", "layer3_3_conv2_weight", "layer3_3_bn2_running_mean", "layer3_3_bn2_running_var", "layer3_3_bn2_weight", "layer3_3_bn2_bias", "var_357", "layer3_3_conv3_weight", "layer3_3_bn3_running_mean", "layer3_3_bn3_running_var", "layer3_3_bn3_weight", "layer3_3_bn3_bias", "layer3_4_bn1_running_mean", "layer3_4_bn1_running_var", "layer3_4_bn1_weight", "layer3_4_bn1_bias", "var_371", "layer3_4_conv2_weight", "layer3_4_bn2_running_mean", "layer3_4_bn2_running_var", "layer3_4_bn2_weight", "layer3_4_bn2_bias", "var_380", "layer3_4_conv3_weight", "layer3_4_bn3_running_mean", "layer3_4_bn3_running_var", "layer3_4_bn3_weight", "layer3_4_bn3_bias", "layer3_5_bn1_running_mean", "layer3_5_bn1_running_var", "layer3_5_bn1_weight", "layer3_5_bn1_bias", "var_394", "layer3_5_conv2_weight", "layer3_5_bn2_running_mean", "layer3_5_bn2_running_var", "layer3_5_bn2_weight", "layer3_5_bn2_bias", "var_403", "layer3_5_conv3_weight", "layer3_5_bn3_running_mean", "layer3_5_bn3_running_var", "layer3_5_bn3_weight", "layer3_5_bn3_bias", "layer3_6_bn1_running_mean", "layer3_6_bn1_running_var", "layer3_6_bn1_weight", "layer3_6_bn1_bias", "var_417", "layer3_6_conv2_weight", "layer3_6_bn2_running_mean", "layer3_6_bn2_running_var", "layer3_6_bn2_weight", "layer3_6_bn2_bias", "var_426", "layer3_6_conv3_weight", "layer3_6_bn3_running_mean", "layer3_6_bn3_running_var", "layer3_6_bn3_weight", "layer3_6_bn3_bias", "layer3_7_bn1_running_mean", "layer3_7_bn1_running_var", "layer3_7_bn1_weight", "layer3_7_bn1_bias", "var_440", "layer3_7_conv2_weight", "layer3_7_bn2_running_mean", "layer3_7_bn2_running_var", "layer3_7_bn2_weight", "layer3_7_bn2_bias", "var_449", "layer3_7_conv3_weight", "layer3_7_bn3_running_mean", "layer3_7_bn3_running_var", "layer3_7_bn3_weight", "layer3_7_bn3_bias", "layer3_8_bn1_running_mean", "layer3_8_bn1_running_var", "layer3_8_bn1_weight", "layer3_8_bn1_bias", "var_463", "layer3_8_conv2_weight", "layer3_8_bn2_running_mean", "layer3_8_bn2_running_var", "layer3_8_bn2_weight", "layer3_8_bn2_bias", "var_472", "layer3_8_conv3_weight", "layer3_8_bn3_running_mean", "layer3_8_bn3_running_var", "layer3_8_bn3_weight", "layer3_8_bn3_bias", "layer3_9_bn1_running_mean", "layer3_9_bn1_running_var", "layer3_9_bn1_weight", "layer3_9_bn1_bias", "var_486", "layer3_9_conv2_weight", "layer3_9_bn2_running_mean", "layer3_9_bn2_running_var", "layer3_9_bn2_weight", "layer3_9_bn2_bias", "var_495", "layer3_9_conv3_weight", "layer3_9_bn3_running_mean", "layer3_9_bn3_running_var", "layer3_9_bn3_weight", "layer3_9_bn3_bias", "layer3_10_bn1_running_mean", "layer3_10_bn1_running_var", "layer3_10_bn1_weight", "layer3_10_bn1_bias", "var_509", "layer3_10_conv2_weight", "layer3_10_bn2_running_mean", "layer3_10_bn2_running_var", "layer3_10_bn2_weight", "layer3_10_bn2_bias", "var_518", "layer3_10_conv3_weight", "layer3_10_bn3_running_mean", "layer3_10_bn3_running_var", "layer3_10_bn3_weight", "layer3_10_bn3_bias", "layer3_11_bn1_running_mean", "layer3_11_bn1_running_var", "layer3_11_bn1_weight", "layer3_11_bn1_bias", "var_532", "layer3_11_conv2_weight", "layer3_11_bn2_running_mean", "layer3_11_bn2_running_var", "layer3_11_bn2_weight", "layer3_11_bn2_bias", "var_541", "layer3_11_conv3_weight", "layer3_11_bn3_running_mean", "layer3_11_bn3_running_var", "layer3_11_bn3_weight", "layer3_11_bn3_bias", "layer3_12_bn1_running_mean", "layer3_12_bn1_running_var", "layer3_12_bn1_weight", "layer3_12_bn1_bias", "var_555", "layer3_12_conv2_weight", "layer3_12_bn2_running_mean", "layer3_12_bn2_running_var", "layer3_12_bn2_weight", "layer3_12_bn2_bias", "var_564", "layer3_12_conv3_weight", "layer3_12_bn3_running_mean", "layer3_12_bn3_running_var", "layer3_12_bn3_weight", "layer3_12_bn3_bias", "layer3_13_bn1_running_mean", "layer3_13_bn1_running_var", "layer3_13_bn1_weight", "layer3_13_bn1_bias", "var_578", "layer3_13_conv2_weight", "layer3_13_bn2_running_mean", "layer3_13_bn2_running_var", "layer3_13_bn2_weight", "layer3_13_bn2_bias", "var_587", "layer3_13_conv3_weight", "layer3_13_bn3_running_mean", "layer3_13_bn3_running_var", "layer3_13_bn3_weight", "layer3_13_bn3_bias", "layer3_14_bn1_running_mean", "layer3_14_bn1_running_var", "layer3_14_bn1_weight", "layer3_14_bn1_bias", "var_601", "layer3_14_conv2_weight", "layer3_14_bn2_running_mean", "layer3_14_bn2_running_var", "layer3_14_bn2_weight", "layer3_14_bn2_bias", "var_610", "layer3_14_conv3_weight", "layer3_14_bn3_running_mean", "layer3_14_bn3_running_var", "layer3_14_bn3_weight", "layer3_14_bn3_bias", "layer3_15_bn1_running_mean", "layer3_15_bn1_running_var", "layer3_15_bn1_weight", "layer3_15_bn1_bias", "var_624", "layer3_15_conv2_weight", "layer3_15_bn2_running_mean", "layer3_15_bn2_running_var", "layer3_15_bn2_weight", "layer3_15_bn2_bias", "var_633", "layer3_15_conv3_weight", "layer3_15_bn3_running_mean", "layer3_15_bn3_running_var", "layer3_15_bn3_weight", "layer3_15_bn3_bias", "layer3_16_bn1_running_mean", "layer3_16_bn1_running_var", "layer3_16_bn1_weight", "layer3_16_bn1_bias", "var_647", "layer3_16_conv2_weight", "layer3_16_bn2_running_mean", "layer3_16_bn2_running_var", "layer3_16_bn2_weight", "layer3_16_bn2_bias", "var_656", "layer3_16_conv3_weight", "layer3_16_bn3_running_mean", "layer3_16_bn3_running_var", "layer3_16_bn3_weight", "layer3_16_bn3_bias", "layer3_17_bn1_running_mean", "layer3_17_bn1_running_var", "layer3_17_bn1_weight", "layer3_17_bn1_bias", "var_670", "layer3_17_conv2_weight", "layer3_17_bn2_running_mean", "layer3_17_bn2_running_var", "layer3_17_bn2_weight", "layer3_17_bn2_bias", "var_679", "layer3_17_conv3_weight", "layer3_17_bn3_running_mean", "layer3_17_bn3_running_var", "layer3_17_bn3_weight", "layer3_17_bn3_bias", "layer3_18_bn1_running_mean", "layer3_18_bn1_running_var", "layer3_18_bn1_weight", "layer3_18_bn1_bias", "var_693", "layer3_18_conv2_weight", "layer3_18_bn2_running_mean", "layer3_18_bn2_running_var", "layer3_18_bn2_weight", "layer3_18_bn2_bias", "var_702", "layer3_18_conv3_weight", "layer3_18_bn3_running_mean", "layer3_18_bn3_running_var", "layer3_18_bn3_weight", "layer3_18_bn3_bias", "layer3_19_bn1_running_mean", "layer3_19_bn1_running_var", "layer3_19_bn1_weight", "layer3_19_bn1_bias", "var_716", "layer3_19_conv2_weight", "layer3_19_bn2_running_mean", "layer3_19_bn2_running_var", "layer3_19_bn2_weight", "layer3_19_bn2_bias", "var_725", "layer3_19_conv3_weight", "layer3_19_bn3_running_mean", "layer3_19_bn3_running_var", "layer3_19_bn3_weight", "layer3_19_bn3_bias", "layer3_20_bn1_running_mean", "layer3_20_bn1_running_var", "layer3_20_bn1_weight", "layer3_20_bn1_bias", "var_739", "layer3_20_conv2_weight", "layer3_20_bn2_running_mean", "layer3_20_bn2_running_var", "layer3_20_bn2_weight", "layer3_20_bn2_bias", "var_748", "layer3_20_conv3_weight", "layer3_20_bn3_running_mean", "layer3_20_bn3_running_var", "layer3_20_bn3_weight", "layer3_20_bn3_bias", "layer3_21_bn1_running_mean", "layer3_21_bn1_running_var", "layer3_21_bn1_weight", "layer3_21_bn1_bias", "var_762", "layer3_21_conv2_weight", "layer3_21_bn2_running_mean", "layer3_21_bn2_running_var", "layer3_21_bn2_weight", "layer3_21_bn2_bias", "var_771", "layer3_21_conv3_weight", "layer3_21_bn3_running_mean", "layer3_21_bn3_running_var", "layer3_21_bn3_weight", "layer3_21_bn3_bias", "layer3_22_bn1_running_mean", "layer3_22_bn1_running_var", "layer3_22_bn1_weight", "layer3_22_bn1_bias", "var_785", "layer3_22_conv2_weight", "layer3_22_bn2_running_mean", "layer3_22_bn2_running_var", "layer3_22_bn2_weight", "layer3_22_bn2_bias", "var_794", "layer3_22_conv3_weight", "layer3_22_bn3_running_mean", "layer3_22_bn3_running_var", "layer3_22_bn3_weight", "layer3_22_bn3_bias", "layer3_23_bn1_running_mean", "layer3_23_bn1_running_var", "layer3_23_bn1_weight", "layer3_23_bn1_bias", "var_808", "layer3_23_conv2_weight", "layer3_23_bn2_running_mean", "layer3_23_bn2_running_var", "layer3_23_bn2_weight", "layer3_23_bn2_bias", "var_817", "layer3_23_conv3_weight", "layer3_23_bn3_running_mean", "layer3_23_bn3_running_var", "layer3_23_bn3_weight", "layer3_23_bn3_bias", "layer3_24_bn1_running_mean", "layer3_24_bn1_running_var", "layer3_24_bn1_weight", "layer3_24_bn1_bias", "var_831", "layer3_24_conv2_weight", "layer3_24_bn2_running_mean", "layer3_24_bn2_running_var", "layer3_24_bn2_weight", "layer3_24_bn2_bias", "var_840", "layer3_24_conv3_weight", "layer3_24_bn3_running_mean", "layer3_24_bn3_running_var", "layer3_24_bn3_weight", "layer3_24_bn3_bias", "layer3_25_bn1_running_mean", "layer3_25_bn1_running_var", "layer3_25_bn1_weight", "layer3_25_bn1_bias", "var_854", "layer3_25_conv2_weight", "layer3_25_bn2_running_mean", "layer3_25_bn2_running_var", "layer3_25_bn2_weight", "layer3_25_bn2_bias", "var_863", "layer3_25_conv3_weight", "layer3_25_bn3_running_mean", "layer3_25_bn3_running_var", "layer3_25_bn3_weight", "layer3_25_bn3_bias", "layer3_26_bn1_running_mean", "layer3_26_bn1_running_var", "layer3_26_bn1_weight", "layer3_26_bn1_bias", "var_877", "layer3_26_conv2_weight", "layer3_26_bn2_running_mean", "layer3_26_bn2_running_var", "layer3_26_bn2_weight", "layer3_26_bn2_bias", "var_886", "layer3_26_conv3_weight", "layer3_26_bn3_running_mean", "layer3_26_bn3_running_var", "layer3_26_bn3_weight", "layer3_26_bn3_bias", "layer3_27_bn1_running_mean", "layer3_27_bn1_running_var", "layer3_27_bn1_weight", "layer3_27_bn1_bias", "var_900", "layer3_27_conv2_weight", "layer3_27_bn2_running_mean", "layer3_27_bn2_running_var", "layer3_27_bn2_weight", "layer3_27_bn2_bias", "var_909", "layer3_27_conv3_weight", "layer3_27_bn3_running_mean", "layer3_27_bn3_running_var", "layer3_27_bn3_weight", "layer3_27_bn3_bias", "layer3_28_bn1_running_mean", "layer3_28_bn1_running_var", "layer3_28_bn1_weight", "layer3_28_bn1_bias", "var_923", "layer3_28_conv2_weight", "layer3_28_bn2_running_mean", "layer3_28_bn2_running_var", "layer3_28_bn2_weight", "layer3_28_bn2_bias", "var_932", "layer3_28_conv3_weight", "layer3_28_bn3_running_mean", "layer3_28_bn3_running_var", "layer3_28_bn3_weight", "layer3_28_bn3_bias", "layer3_29_bn1_running_mean", "layer3_29_bn1_running_var", "layer3_29_bn1_weight", "layer3_29_bn1_bias", "var_946", "layer3_29_conv2_weight", "layer3_29_bn2_running_mean", "layer3_29_bn2_running_var", "layer3_29_bn2_weight", "layer3_29_bn2_bias", "var_955", "layer3_29_conv3_weight", "layer3_29_bn3_running_mean", "layer3_29_bn3_running_var", "layer3_29_bn3_weight", "layer3_29_bn3_bias", "layer3_30_bn1_running_mean", "layer3_30_bn1_running_var", "layer3_30_bn1_weight", "layer3_30_bn1_bias", "var_969", "layer3_30_conv2_weight", "layer3_30_bn2_running_mean", "layer3_30_bn2_running_var", "layer3_30_bn2_weight", "layer3_30_bn2_bias", "var_978", "layer3_30_conv3_weight", "layer3_30_bn3_running_mean", "layer3_30_bn3_running_var", "layer3_30_bn3_weight", "layer3_30_bn3_bias", "layer3_31_bn1_running_mean", "layer3_31_bn1_running_var", "layer3_31_bn1_weight", "layer3_31_bn1_bias", "var_992", "layer3_31_conv2_weight", "layer3_31_bn2_running_mean", "layer3_31_bn2_running_var", "layer3_31_bn2_weight", "layer3_31_bn2_bias", "var_1001", "layer3_31_conv3_weight", "layer3_31_bn3_running_mean", "layer3_31_bn3_running_var", "layer3_31_bn3_weight", "layer3_31_bn3_bias", "layer3_32_bn1_running_mean", "layer3_32_bn1_running_var", "layer3_32_bn1_weight", "layer3_32_bn1_bias", "var_1015", "layer3_32_conv2_weight", "layer3_32_bn2_running_mean", "layer3_32_bn2_running_var", "layer3_32_bn2_weight", "layer3_32_bn2_bias", "var_1024", "layer3_32_conv3_weight", "layer3_32_bn3_running_mean", "layer3_32_bn3_running_var", "layer3_32_bn3_weight", "layer3_32_bn3_bias", "layer3_33_bn1_running_mean", "layer3_33_bn1_running_var", "layer3_33_bn1_weight", "layer3_33_bn1_bias", "var_1038", "layer3_33_conv2_weight", "layer3_33_bn2_running_mean", "layer3_33_bn2_running_var", "layer3_33_bn2_weight", "layer3_33_bn2_bias", "var_1047", "layer3_33_conv3_weight", "layer3_33_bn3_running_mean", "layer3_33_bn3_running_var", "layer3_33_bn3_weight", "layer3_33_bn3_bias", "layer3_34_bn1_running_mean", "layer3_34_bn1_running_var", "layer3_34_bn1_weight", "layer3_34_bn1_bias", "var_1061", "layer3_34_conv2_weight", "layer3_34_bn2_running_mean", "layer3_34_bn2_running_var", "layer3_34_bn2_weight", "layer3_34_bn2_bias", "var_1070", "layer3_34_conv3_weight", "layer3_34_bn3_running_mean", "layer3_34_bn3_running_var", "layer3_34_bn3_weight", "layer3_34_bn3_bias", "layer3_35_bn1_running_mean", "layer3_35_bn1_running_var", "layer3_35_bn1_weight", "layer3_35_bn1_bias", "var_1084", "layer3_35_conv2_weight", "layer3_35_bn2_running_mean", "layer3_35_bn2_running_var", "layer3_35_bn2_weight", "layer3_35_bn2_bias", "var_1093", "layer3_35_conv3_weight", "layer3_35_bn3_running_mean", "layer3_35_bn3_running_var", "layer3_35_bn3_weight", "layer3_35_bn3_bias", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "var_1107", "layer4_0_conv2_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_0_weight", "layer4_0_bn3_running_mean", "layer4_0_bn3_running_var", "layer4_0_bn3_weight", "layer4_0_bn3_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "var_1128", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "var_1134", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "layer4_1_bn3_running_mean", "layer4_1_bn3_running_var", "layer4_1_bn3_weight", "layer4_1_bn3_bias", "var_1148", "layer4_2_bn1_running_mean", "layer4_2_bn1_running_var", "layer4_2_bn1_weight", "layer4_2_bn1_bias", "var_1154", "layer4_2_conv2_weight", "layer4_2_bn2_running_mean", "layer4_2_bn2_running_var", "layer4_2_bn2_weight", "layer4_2_bn2_bias", "layer4_2_bn3_running_mean", "layer4_2_bn3_running_var", "layer4_2_bn3_weight", "layer4_2_bn3_bias", "var_1169", "var_1170", "fc_bias", "fc_weight"], + "buffers": {"buf1": {"Absolute": 3211264}, "buf2": {"Absolute": 802816}, "buf4": {"Absolute": 802816}, "buf6": {"Absolute": 802816}, "buf10": {"Absolute": 3211264}, "buf24": {"Absolute": 1605632}, "buf26": {"Absolute": 401408}, "buf30": {"Absolute": 1605632}, "buf34": {"Absolute": 401408}, "buf75": {"Absolute": 200704}, "buf83": {"Absolute": 200704}, "buf294": {"Absolute": 100352}, "buf302": {"Absolute": 100352}, "buf310": {"Absolute": 8192}, "buf312": {"Absolute": 4000}}, + "outputs": ["buf312"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg932_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf1"}, {"Ptr": "buf2"}, {"Ptr": "var_9"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf6"}, {"Ptr": "layer1_0_conv3_weight"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern005", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_downsample_0_weight"}, {"Ptr": "buf1"}, {"Ptr": "layer1_0_bn3_running_mean"}, {"Ptr": "layer1_0_bn3_running_var"}, {"Ptr": "layer1_0_bn3_weight"}, {"Ptr": "layer1_0_bn3_bias"}, {"Ptr": "layer1_0_downsample_1_running_mean"}, {"Ptr": "layer1_0_downsample_1_running_var"}, {"Ptr": "layer1_0_downsample_1_weight"}, {"Ptr": "layer1_0_downsample_1_bias"}, {"Ptr": "buf10"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf10"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf6"}, {"Ptr": "layer1_1_conv3_weight"}, {"Ptr": "layer1_1_bn3_running_mean"}, {"Ptr": "layer1_1_bn3_running_var"}, {"Ptr": "layer1_1_bn3_weight"}, {"Ptr": "layer1_1_bn3_bias"}, {"Ptr": "buf10"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf1"}, {"Ptr": "layer1_2_conv1_weight"}, {"Ptr": "layer1_2_bn1_running_mean"}, {"Ptr": "layer1_2_bn1_running_var"}, {"Ptr": "layer1_2_bn1_weight"}, {"Ptr": "layer1_2_bn1_bias"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf6"}, {"Ptr": "layer1_2_conv2_weight"}, {"Ptr": "layer1_2_bn2_running_mean"}, {"Ptr": "layer1_2_bn2_running_var"}, {"Ptr": "layer1_2_bn2_weight"}, {"Ptr": "layer1_2_bn2_bias"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf2"}, {"Ptr": "layer1_2_conv3_weight"}, {"Ptr": "layer1_2_bn3_running_mean"}, {"Ptr": "layer1_2_bn3_running_var"}, {"Ptr": "layer1_2_bn3_weight"}, {"Ptr": "layer1_2_bn3_bias"}, {"Ptr": "buf1"}, {"Ptr": "buf10"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf10"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf24"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf26"}, {"Ptr": "layer2_0_conv3_weight"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern015", [{"Ptr": "buf10"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf24"}, {"Ptr": "layer2_0_bn3_running_mean"}, {"Ptr": "layer2_0_bn3_running_var"}, {"Ptr": "layer2_0_bn3_weight"}, {"Ptr": "layer2_0_bn3_bias"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "buf30"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf30"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf26"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer2_1_conv3_weight"}, {"Ptr": "layer2_1_bn3_running_mean"}, {"Ptr": "layer2_1_bn3_running_var"}, {"Ptr": "layer2_1_bn3_weight"}, {"Ptr": "layer2_1_bn3_bias"}, {"Ptr": "buf30"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf24"}, {"Ptr": "layer2_2_conv1_weight"}, {"Ptr": "layer2_2_bn1_running_mean"}, {"Ptr": "layer2_2_bn1_running_var"}, {"Ptr": "layer2_2_bn1_weight"}, {"Ptr": "layer2_2_bn1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf34"}, {"Ptr": "layer2_2_conv2_weight"}, {"Ptr": "layer2_2_bn2_running_mean"}, {"Ptr": "layer2_2_bn2_running_var"}, {"Ptr": "layer2_2_bn2_weight"}, {"Ptr": "layer2_2_bn2_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf26"}, {"Ptr": "layer2_2_conv3_weight"}, {"Ptr": "layer2_2_bn3_running_mean"}, {"Ptr": "layer2_2_bn3_running_var"}, {"Ptr": "layer2_2_bn3_weight"}, {"Ptr": "layer2_2_bn3_bias"}, {"Ptr": "buf24"}, {"Ptr": "buf30"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf30"}, {"Ptr": "layer2_3_conv1_weight"}, {"Ptr": "layer2_3_bn1_running_mean"}, {"Ptr": "layer2_3_bn1_running_var"}, {"Ptr": "layer2_3_bn1_weight"}, {"Ptr": "layer2_3_bn1_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf26"}, {"Ptr": "layer2_3_conv2_weight"}, {"Ptr": "layer2_3_bn2_running_mean"}, {"Ptr": "layer2_3_bn2_running_var"}, {"Ptr": "layer2_3_bn2_weight"}, {"Ptr": "layer2_3_bn2_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer2_3_conv3_weight"}, {"Ptr": "layer2_3_bn3_running_mean"}, {"Ptr": "layer2_3_bn3_running_var"}, {"Ptr": "layer2_3_bn3_weight"}, {"Ptr": "layer2_3_bn3_bias"}, {"Ptr": "buf30"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf24"}, {"Ptr": "layer2_4_conv1_weight"}, {"Ptr": "layer2_4_bn1_running_mean"}, {"Ptr": "layer2_4_bn1_running_var"}, {"Ptr": "layer2_4_bn1_weight"}, {"Ptr": "layer2_4_bn1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf34"}, {"Ptr": "layer2_4_conv2_weight"}, {"Ptr": "layer2_4_bn2_running_mean"}, {"Ptr": "layer2_4_bn2_running_var"}, {"Ptr": "layer2_4_bn2_weight"}, {"Ptr": "layer2_4_bn2_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf26"}, {"Ptr": "layer2_4_conv3_weight"}, {"Ptr": "layer2_4_bn3_running_mean"}, {"Ptr": "layer2_4_bn3_running_var"}, {"Ptr": "layer2_4_bn3_weight"}, {"Ptr": "layer2_4_bn3_bias"}, {"Ptr": "buf24"}, {"Ptr": "buf30"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf30"}, {"Ptr": "layer2_5_conv1_weight"}, {"Ptr": "layer2_5_bn1_running_mean"}, {"Ptr": "layer2_5_bn1_running_var"}, {"Ptr": "layer2_5_bn1_weight"}, {"Ptr": "layer2_5_bn1_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf26"}, {"Ptr": "layer2_5_conv2_weight"}, {"Ptr": "layer2_5_bn2_running_mean"}, {"Ptr": "layer2_5_bn2_running_var"}, {"Ptr": "layer2_5_bn2_weight"}, {"Ptr": "layer2_5_bn2_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer2_5_conv3_weight"}, {"Ptr": "layer2_5_bn3_running_mean"}, {"Ptr": "layer2_5_bn3_running_var"}, {"Ptr": "layer2_5_bn3_weight"}, {"Ptr": "layer2_5_bn3_bias"}, {"Ptr": "buf30"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf24"}, {"Ptr": "layer2_6_conv1_weight"}, {"Ptr": "layer2_6_bn1_running_mean"}, {"Ptr": "layer2_6_bn1_running_var"}, {"Ptr": "layer2_6_bn1_weight"}, {"Ptr": "layer2_6_bn1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf34"}, {"Ptr": "layer2_6_conv2_weight"}, {"Ptr": "layer2_6_bn2_running_mean"}, {"Ptr": "layer2_6_bn2_running_var"}, {"Ptr": "layer2_6_bn2_weight"}, {"Ptr": "layer2_6_bn2_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf26"}, {"Ptr": "layer2_6_conv3_weight"}, {"Ptr": "layer2_6_bn3_running_mean"}, {"Ptr": "layer2_6_bn3_running_var"}, {"Ptr": "layer2_6_bn3_weight"}, {"Ptr": "layer2_6_bn3_bias"}, {"Ptr": "buf24"}, {"Ptr": "buf30"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf30"}, {"Ptr": "layer2_7_conv1_weight"}, {"Ptr": "layer2_7_bn1_running_mean"}, {"Ptr": "layer2_7_bn1_running_var"}, {"Ptr": "layer2_7_bn1_weight"}, {"Ptr": "layer2_7_bn1_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf26"}, {"Ptr": "layer2_7_conv2_weight"}, {"Ptr": "layer2_7_bn2_running_mean"}, {"Ptr": "layer2_7_bn2_running_var"}, {"Ptr": "layer2_7_bn2_weight"}, {"Ptr": "layer2_7_bn2_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer2_7_conv3_weight"}, {"Ptr": "layer2_7_bn3_running_mean"}, {"Ptr": "layer2_7_bn3_running_var"}, {"Ptr": "layer2_7_bn3_weight"}, {"Ptr": "layer2_7_bn3_bias"}, {"Ptr": "buf30"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern037", [{"Ptr": "buf24"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern038", [{"Ptr": "buf2"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "var_281"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern040", [{"Ptr": "buf75"}, {"Ptr": "layer3_0_conv3_weight"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern041", [{"Ptr": "buf24"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf2"}, {"Ptr": "layer3_0_bn3_running_mean"}, {"Ptr": "layer3_0_bn3_running_var"}, {"Ptr": "layer3_0_bn3_weight"}, {"Ptr": "layer3_0_bn3_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "var_302"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_311"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_1_conv3_weight"}, {"Ptr": "layer3_1_bn3_running_mean"}, {"Ptr": "layer3_1_bn3_running_var"}, {"Ptr": "layer3_1_bn3_weight"}, {"Ptr": "layer3_1_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_2_bn1_running_mean"}, {"Ptr": "layer3_2_bn1_running_var"}, {"Ptr": "layer3_2_bn1_weight"}, {"Ptr": "layer3_2_bn1_bias"}, {"Ptr": "var_325"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_2_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_2_bn2_running_mean"}, {"Ptr": "layer3_2_bn2_running_var"}, {"Ptr": "layer3_2_bn2_weight"}, {"Ptr": "layer3_2_bn2_bias"}, {"Ptr": "var_334"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_2_conv3_weight"}, {"Ptr": "layer3_2_bn3_running_mean"}, {"Ptr": "layer3_2_bn3_running_var"}, {"Ptr": "layer3_2_bn3_weight"}, {"Ptr": "layer3_2_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_3_bn1_running_mean"}, {"Ptr": "layer3_3_bn1_running_var"}, {"Ptr": "layer3_3_bn1_weight"}, {"Ptr": "layer3_3_bn1_bias"}, {"Ptr": "var_348"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_3_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_3_bn2_running_mean"}, {"Ptr": "layer3_3_bn2_running_var"}, {"Ptr": "layer3_3_bn2_weight"}, {"Ptr": "layer3_3_bn2_bias"}, {"Ptr": "var_357"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_3_conv3_weight"}, {"Ptr": "layer3_3_bn3_running_mean"}, {"Ptr": "layer3_3_bn3_running_var"}, {"Ptr": "layer3_3_bn3_weight"}, {"Ptr": "layer3_3_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_4_bn1_running_mean"}, {"Ptr": "layer3_4_bn1_running_var"}, {"Ptr": "layer3_4_bn1_weight"}, {"Ptr": "layer3_4_bn1_bias"}, {"Ptr": "var_371"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_4_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_4_bn2_running_mean"}, {"Ptr": "layer3_4_bn2_running_var"}, {"Ptr": "layer3_4_bn2_weight"}, {"Ptr": "layer3_4_bn2_bias"}, {"Ptr": "var_380"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_4_conv3_weight"}, {"Ptr": "layer3_4_bn3_running_mean"}, {"Ptr": "layer3_4_bn3_running_var"}, {"Ptr": "layer3_4_bn3_weight"}, {"Ptr": "layer3_4_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_5_bn1_running_mean"}, {"Ptr": "layer3_5_bn1_running_var"}, {"Ptr": "layer3_5_bn1_weight"}, {"Ptr": "layer3_5_bn1_bias"}, {"Ptr": "var_394"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_5_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_5_bn2_running_mean"}, {"Ptr": "layer3_5_bn2_running_var"}, {"Ptr": "layer3_5_bn2_weight"}, {"Ptr": "layer3_5_bn2_bias"}, {"Ptr": "var_403"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_5_conv3_weight"}, {"Ptr": "layer3_5_bn3_running_mean"}, {"Ptr": "layer3_5_bn3_running_var"}, {"Ptr": "layer3_5_bn3_weight"}, {"Ptr": "layer3_5_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_6_bn1_running_mean"}, {"Ptr": "layer3_6_bn1_running_var"}, {"Ptr": "layer3_6_bn1_weight"}, {"Ptr": "layer3_6_bn1_bias"}, {"Ptr": "var_417"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_6_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_6_bn2_running_mean"}, {"Ptr": "layer3_6_bn2_running_var"}, {"Ptr": "layer3_6_bn2_weight"}, {"Ptr": "layer3_6_bn2_bias"}, {"Ptr": "var_426"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_6_conv3_weight"}, {"Ptr": "layer3_6_bn3_running_mean"}, {"Ptr": "layer3_6_bn3_running_var"}, {"Ptr": "layer3_6_bn3_weight"}, {"Ptr": "layer3_6_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_7_bn1_running_mean"}, {"Ptr": "layer3_7_bn1_running_var"}, {"Ptr": "layer3_7_bn1_weight"}, {"Ptr": "layer3_7_bn1_bias"}, {"Ptr": "var_440"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_7_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_7_bn2_running_mean"}, {"Ptr": "layer3_7_bn2_running_var"}, {"Ptr": "layer3_7_bn2_weight"}, {"Ptr": "layer3_7_bn2_bias"}, {"Ptr": "var_449"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_7_conv3_weight"}, {"Ptr": "layer3_7_bn3_running_mean"}, {"Ptr": "layer3_7_bn3_running_var"}, {"Ptr": "layer3_7_bn3_weight"}, {"Ptr": "layer3_7_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_8_bn1_running_mean"}, {"Ptr": "layer3_8_bn1_running_var"}, {"Ptr": "layer3_8_bn1_weight"}, {"Ptr": "layer3_8_bn1_bias"}, {"Ptr": "var_463"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_8_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_8_bn2_running_mean"}, {"Ptr": "layer3_8_bn2_running_var"}, {"Ptr": "layer3_8_bn2_weight"}, {"Ptr": "layer3_8_bn2_bias"}, {"Ptr": "var_472"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_8_conv3_weight"}, {"Ptr": "layer3_8_bn3_running_mean"}, {"Ptr": "layer3_8_bn3_running_var"}, {"Ptr": "layer3_8_bn3_weight"}, {"Ptr": "layer3_8_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_9_bn1_running_mean"}, {"Ptr": "layer3_9_bn1_running_var"}, {"Ptr": "layer3_9_bn1_weight"}, {"Ptr": "layer3_9_bn1_bias"}, {"Ptr": "var_486"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_9_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_9_bn2_running_mean"}, {"Ptr": "layer3_9_bn2_running_var"}, {"Ptr": "layer3_9_bn2_weight"}, {"Ptr": "layer3_9_bn2_bias"}, {"Ptr": "var_495"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_9_conv3_weight"}, {"Ptr": "layer3_9_bn3_running_mean"}, {"Ptr": "layer3_9_bn3_running_var"}, {"Ptr": "layer3_9_bn3_weight"}, {"Ptr": "layer3_9_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_10_bn1_running_mean"}, {"Ptr": "layer3_10_bn1_running_var"}, {"Ptr": "layer3_10_bn1_weight"}, {"Ptr": "layer3_10_bn1_bias"}, {"Ptr": "var_509"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_10_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_10_bn2_running_mean"}, {"Ptr": "layer3_10_bn2_running_var"}, {"Ptr": "layer3_10_bn2_weight"}, {"Ptr": "layer3_10_bn2_bias"}, {"Ptr": "var_518"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_10_conv3_weight"}, {"Ptr": "layer3_10_bn3_running_mean"}, {"Ptr": "layer3_10_bn3_running_var"}, {"Ptr": "layer3_10_bn3_weight"}, {"Ptr": "layer3_10_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_11_bn1_running_mean"}, {"Ptr": "layer3_11_bn1_running_var"}, {"Ptr": "layer3_11_bn1_weight"}, {"Ptr": "layer3_11_bn1_bias"}, {"Ptr": "var_532"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_11_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_11_bn2_running_mean"}, {"Ptr": "layer3_11_bn2_running_var"}, {"Ptr": "layer3_11_bn2_weight"}, {"Ptr": "layer3_11_bn2_bias"}, {"Ptr": "var_541"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_11_conv3_weight"}, {"Ptr": "layer3_11_bn3_running_mean"}, {"Ptr": "layer3_11_bn3_running_var"}, {"Ptr": "layer3_11_bn3_weight"}, {"Ptr": "layer3_11_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_12_bn1_running_mean"}, {"Ptr": "layer3_12_bn1_running_var"}, {"Ptr": "layer3_12_bn1_weight"}, {"Ptr": "layer3_12_bn1_bias"}, {"Ptr": "var_555"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_12_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_12_bn2_running_mean"}, {"Ptr": "layer3_12_bn2_running_var"}, {"Ptr": "layer3_12_bn2_weight"}, {"Ptr": "layer3_12_bn2_bias"}, {"Ptr": "var_564"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_12_conv3_weight"}, {"Ptr": "layer3_12_bn3_running_mean"}, {"Ptr": "layer3_12_bn3_running_var"}, {"Ptr": "layer3_12_bn3_weight"}, {"Ptr": "layer3_12_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_13_bn1_running_mean"}, {"Ptr": "layer3_13_bn1_running_var"}, {"Ptr": "layer3_13_bn1_weight"}, {"Ptr": "layer3_13_bn1_bias"}, {"Ptr": "var_578"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_13_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_13_bn2_running_mean"}, {"Ptr": "layer3_13_bn2_running_var"}, {"Ptr": "layer3_13_bn2_weight"}, {"Ptr": "layer3_13_bn2_bias"}, {"Ptr": "var_587"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_13_conv3_weight"}, {"Ptr": "layer3_13_bn3_running_mean"}, {"Ptr": "layer3_13_bn3_running_var"}, {"Ptr": "layer3_13_bn3_weight"}, {"Ptr": "layer3_13_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_14_bn1_running_mean"}, {"Ptr": "layer3_14_bn1_running_var"}, {"Ptr": "layer3_14_bn1_weight"}, {"Ptr": "layer3_14_bn1_bias"}, {"Ptr": "var_601"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_14_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_14_bn2_running_mean"}, {"Ptr": "layer3_14_bn2_running_var"}, {"Ptr": "layer3_14_bn2_weight"}, {"Ptr": "layer3_14_bn2_bias"}, {"Ptr": "var_610"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_14_conv3_weight"}, {"Ptr": "layer3_14_bn3_running_mean"}, {"Ptr": "layer3_14_bn3_running_var"}, {"Ptr": "layer3_14_bn3_weight"}, {"Ptr": "layer3_14_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_15_bn1_running_mean"}, {"Ptr": "layer3_15_bn1_running_var"}, {"Ptr": "layer3_15_bn1_weight"}, {"Ptr": "layer3_15_bn1_bias"}, {"Ptr": "var_624"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_15_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_15_bn2_running_mean"}, {"Ptr": "layer3_15_bn2_running_var"}, {"Ptr": "layer3_15_bn2_weight"}, {"Ptr": "layer3_15_bn2_bias"}, {"Ptr": "var_633"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_15_conv3_weight"}, {"Ptr": "layer3_15_bn3_running_mean"}, {"Ptr": "layer3_15_bn3_running_var"}, {"Ptr": "layer3_15_bn3_weight"}, {"Ptr": "layer3_15_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_16_bn1_running_mean"}, {"Ptr": "layer3_16_bn1_running_var"}, {"Ptr": "layer3_16_bn1_weight"}, {"Ptr": "layer3_16_bn1_bias"}, {"Ptr": "var_647"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_16_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_16_bn2_running_mean"}, {"Ptr": "layer3_16_bn2_running_var"}, {"Ptr": "layer3_16_bn2_weight"}, {"Ptr": "layer3_16_bn2_bias"}, {"Ptr": "var_656"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_16_conv3_weight"}, {"Ptr": "layer3_16_bn3_running_mean"}, {"Ptr": "layer3_16_bn3_running_var"}, {"Ptr": "layer3_16_bn3_weight"}, {"Ptr": "layer3_16_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_17_bn1_running_mean"}, {"Ptr": "layer3_17_bn1_running_var"}, {"Ptr": "layer3_17_bn1_weight"}, {"Ptr": "layer3_17_bn1_bias"}, {"Ptr": "var_670"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_17_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_17_bn2_running_mean"}, {"Ptr": "layer3_17_bn2_running_var"}, {"Ptr": "layer3_17_bn2_weight"}, {"Ptr": "layer3_17_bn2_bias"}, {"Ptr": "var_679"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_17_conv3_weight"}, {"Ptr": "layer3_17_bn3_running_mean"}, {"Ptr": "layer3_17_bn3_running_var"}, {"Ptr": "layer3_17_bn3_weight"}, {"Ptr": "layer3_17_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_18_bn1_running_mean"}, {"Ptr": "layer3_18_bn1_running_var"}, {"Ptr": "layer3_18_bn1_weight"}, {"Ptr": "layer3_18_bn1_bias"}, {"Ptr": "var_693"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_18_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_18_bn2_running_mean"}, {"Ptr": "layer3_18_bn2_running_var"}, {"Ptr": "layer3_18_bn2_weight"}, {"Ptr": "layer3_18_bn2_bias"}, {"Ptr": "var_702"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_18_conv3_weight"}, {"Ptr": "layer3_18_bn3_running_mean"}, {"Ptr": "layer3_18_bn3_running_var"}, {"Ptr": "layer3_18_bn3_weight"}, {"Ptr": "layer3_18_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_19_bn1_running_mean"}, {"Ptr": "layer3_19_bn1_running_var"}, {"Ptr": "layer3_19_bn1_weight"}, {"Ptr": "layer3_19_bn1_bias"}, {"Ptr": "var_716"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_19_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_19_bn2_running_mean"}, {"Ptr": "layer3_19_bn2_running_var"}, {"Ptr": "layer3_19_bn2_weight"}, {"Ptr": "layer3_19_bn2_bias"}, {"Ptr": "var_725"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_19_conv3_weight"}, {"Ptr": "layer3_19_bn3_running_mean"}, {"Ptr": "layer3_19_bn3_running_var"}, {"Ptr": "layer3_19_bn3_weight"}, {"Ptr": "layer3_19_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_20_bn1_running_mean"}, {"Ptr": "layer3_20_bn1_running_var"}, {"Ptr": "layer3_20_bn1_weight"}, {"Ptr": "layer3_20_bn1_bias"}, {"Ptr": "var_739"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_20_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_20_bn2_running_mean"}, {"Ptr": "layer3_20_bn2_running_var"}, {"Ptr": "layer3_20_bn2_weight"}, {"Ptr": "layer3_20_bn2_bias"}, {"Ptr": "var_748"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_20_conv3_weight"}, {"Ptr": "layer3_20_bn3_running_mean"}, {"Ptr": "layer3_20_bn3_running_var"}, {"Ptr": "layer3_20_bn3_weight"}, {"Ptr": "layer3_20_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_21_bn1_running_mean"}, {"Ptr": "layer3_21_bn1_running_var"}, {"Ptr": "layer3_21_bn1_weight"}, {"Ptr": "layer3_21_bn1_bias"}, {"Ptr": "var_762"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_21_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_21_bn2_running_mean"}, {"Ptr": "layer3_21_bn2_running_var"}, {"Ptr": "layer3_21_bn2_weight"}, {"Ptr": "layer3_21_bn2_bias"}, {"Ptr": "var_771"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_21_conv3_weight"}, {"Ptr": "layer3_21_bn3_running_mean"}, {"Ptr": "layer3_21_bn3_running_var"}, {"Ptr": "layer3_21_bn3_weight"}, {"Ptr": "layer3_21_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_22_bn1_running_mean"}, {"Ptr": "layer3_22_bn1_running_var"}, {"Ptr": "layer3_22_bn1_weight"}, {"Ptr": "layer3_22_bn1_bias"}, {"Ptr": "var_785"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_22_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_22_bn2_running_mean"}, {"Ptr": "layer3_22_bn2_running_var"}, {"Ptr": "layer3_22_bn2_weight"}, {"Ptr": "layer3_22_bn2_bias"}, {"Ptr": "var_794"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_22_conv3_weight"}, {"Ptr": "layer3_22_bn3_running_mean"}, {"Ptr": "layer3_22_bn3_running_var"}, {"Ptr": "layer3_22_bn3_weight"}, {"Ptr": "layer3_22_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_23_bn1_running_mean"}, {"Ptr": "layer3_23_bn1_running_var"}, {"Ptr": "layer3_23_bn1_weight"}, {"Ptr": "layer3_23_bn1_bias"}, {"Ptr": "var_808"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_23_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_23_bn2_running_mean"}, {"Ptr": "layer3_23_bn2_running_var"}, {"Ptr": "layer3_23_bn2_weight"}, {"Ptr": "layer3_23_bn2_bias"}, {"Ptr": "var_817"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_23_conv3_weight"}, {"Ptr": "layer3_23_bn3_running_mean"}, {"Ptr": "layer3_23_bn3_running_var"}, {"Ptr": "layer3_23_bn3_weight"}, {"Ptr": "layer3_23_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_24_bn1_running_mean"}, {"Ptr": "layer3_24_bn1_running_var"}, {"Ptr": "layer3_24_bn1_weight"}, {"Ptr": "layer3_24_bn1_bias"}, {"Ptr": "var_831"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_24_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_24_bn2_running_mean"}, {"Ptr": "layer3_24_bn2_running_var"}, {"Ptr": "layer3_24_bn2_weight"}, {"Ptr": "layer3_24_bn2_bias"}, {"Ptr": "var_840"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_24_conv3_weight"}, {"Ptr": "layer3_24_bn3_running_mean"}, {"Ptr": "layer3_24_bn3_running_var"}, {"Ptr": "layer3_24_bn3_weight"}, {"Ptr": "layer3_24_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_25_bn1_running_mean"}, {"Ptr": "layer3_25_bn1_running_var"}, {"Ptr": "layer3_25_bn1_weight"}, {"Ptr": "layer3_25_bn1_bias"}, {"Ptr": "var_854"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_25_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_25_bn2_running_mean"}, {"Ptr": "layer3_25_bn2_running_var"}, {"Ptr": "layer3_25_bn2_weight"}, {"Ptr": "layer3_25_bn2_bias"}, {"Ptr": "var_863"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_25_conv3_weight"}, {"Ptr": "layer3_25_bn3_running_mean"}, {"Ptr": "layer3_25_bn3_running_var"}, {"Ptr": "layer3_25_bn3_weight"}, {"Ptr": "layer3_25_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_26_bn1_running_mean"}, {"Ptr": "layer3_26_bn1_running_var"}, {"Ptr": "layer3_26_bn1_weight"}, {"Ptr": "layer3_26_bn1_bias"}, {"Ptr": "var_877"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_26_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_26_bn2_running_mean"}, {"Ptr": "layer3_26_bn2_running_var"}, {"Ptr": "layer3_26_bn2_weight"}, {"Ptr": "layer3_26_bn2_bias"}, {"Ptr": "var_886"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_26_conv3_weight"}, {"Ptr": "layer3_26_bn3_running_mean"}, {"Ptr": "layer3_26_bn3_running_var"}, {"Ptr": "layer3_26_bn3_weight"}, {"Ptr": "layer3_26_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_27_bn1_running_mean"}, {"Ptr": "layer3_27_bn1_running_var"}, {"Ptr": "layer3_27_bn1_weight"}, {"Ptr": "layer3_27_bn1_bias"}, {"Ptr": "var_900"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_27_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_27_bn2_running_mean"}, {"Ptr": "layer3_27_bn2_running_var"}, {"Ptr": "layer3_27_bn2_weight"}, {"Ptr": "layer3_27_bn2_bias"}, {"Ptr": "var_909"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_27_conv3_weight"}, {"Ptr": "layer3_27_bn3_running_mean"}, {"Ptr": "layer3_27_bn3_running_var"}, {"Ptr": "layer3_27_bn3_weight"}, {"Ptr": "layer3_27_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_28_bn1_running_mean"}, {"Ptr": "layer3_28_bn1_running_var"}, {"Ptr": "layer3_28_bn1_weight"}, {"Ptr": "layer3_28_bn1_bias"}, {"Ptr": "var_923"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_28_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_28_bn2_running_mean"}, {"Ptr": "layer3_28_bn2_running_var"}, {"Ptr": "layer3_28_bn2_weight"}, {"Ptr": "layer3_28_bn2_bias"}, {"Ptr": "var_932"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_28_conv3_weight"}, {"Ptr": "layer3_28_bn3_running_mean"}, {"Ptr": "layer3_28_bn3_running_var"}, {"Ptr": "layer3_28_bn3_weight"}, {"Ptr": "layer3_28_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_29_bn1_running_mean"}, {"Ptr": "layer3_29_bn1_running_var"}, {"Ptr": "layer3_29_bn1_weight"}, {"Ptr": "layer3_29_bn1_bias"}, {"Ptr": "var_946"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_29_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_29_bn2_running_mean"}, {"Ptr": "layer3_29_bn2_running_var"}, {"Ptr": "layer3_29_bn2_weight"}, {"Ptr": "layer3_29_bn2_bias"}, {"Ptr": "var_955"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_29_conv3_weight"}, {"Ptr": "layer3_29_bn3_running_mean"}, {"Ptr": "layer3_29_bn3_running_var"}, {"Ptr": "layer3_29_bn3_weight"}, {"Ptr": "layer3_29_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_30_bn1_running_mean"}, {"Ptr": "layer3_30_bn1_running_var"}, {"Ptr": "layer3_30_bn1_weight"}, {"Ptr": "layer3_30_bn1_bias"}, {"Ptr": "var_969"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_30_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_30_bn2_running_mean"}, {"Ptr": "layer3_30_bn2_running_var"}, {"Ptr": "layer3_30_bn2_weight"}, {"Ptr": "layer3_30_bn2_bias"}, {"Ptr": "var_978"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_30_conv3_weight"}, {"Ptr": "layer3_30_bn3_running_mean"}, {"Ptr": "layer3_30_bn3_running_var"}, {"Ptr": "layer3_30_bn3_weight"}, {"Ptr": "layer3_30_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_31_bn1_running_mean"}, {"Ptr": "layer3_31_bn1_running_var"}, {"Ptr": "layer3_31_bn1_weight"}, {"Ptr": "layer3_31_bn1_bias"}, {"Ptr": "var_992"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_31_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_31_bn2_running_mean"}, {"Ptr": "layer3_31_bn2_running_var"}, {"Ptr": "layer3_31_bn2_weight"}, {"Ptr": "layer3_31_bn2_bias"}, {"Ptr": "var_1001"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_31_conv3_weight"}, {"Ptr": "layer3_31_bn3_running_mean"}, {"Ptr": "layer3_31_bn3_running_var"}, {"Ptr": "layer3_31_bn3_weight"}, {"Ptr": "layer3_31_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_32_bn1_running_mean"}, {"Ptr": "layer3_32_bn1_running_var"}, {"Ptr": "layer3_32_bn1_weight"}, {"Ptr": "layer3_32_bn1_bias"}, {"Ptr": "var_1015"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_32_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_32_bn2_running_mean"}, {"Ptr": "layer3_32_bn2_running_var"}, {"Ptr": "layer3_32_bn2_weight"}, {"Ptr": "layer3_32_bn2_bias"}, {"Ptr": "var_1024"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_32_conv3_weight"}, {"Ptr": "layer3_32_bn3_running_mean"}, {"Ptr": "layer3_32_bn3_running_var"}, {"Ptr": "layer3_32_bn3_weight"}, {"Ptr": "layer3_32_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_33_bn1_running_mean"}, {"Ptr": "layer3_33_bn1_running_var"}, {"Ptr": "layer3_33_bn1_weight"}, {"Ptr": "layer3_33_bn1_bias"}, {"Ptr": "var_1038"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_33_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_33_bn2_running_mean"}, {"Ptr": "layer3_33_bn2_running_var"}, {"Ptr": "layer3_33_bn2_weight"}, {"Ptr": "layer3_33_bn2_bias"}, {"Ptr": "var_1047"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_33_conv3_weight"}, {"Ptr": "layer3_33_bn3_running_mean"}, {"Ptr": "layer3_33_bn3_running_var"}, {"Ptr": "layer3_33_bn3_weight"}, {"Ptr": "layer3_33_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_34_bn1_running_mean"}, {"Ptr": "layer3_34_bn1_running_var"}, {"Ptr": "layer3_34_bn1_weight"}, {"Ptr": "layer3_34_bn1_bias"}, {"Ptr": "var_1061"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf83"}, {"Ptr": "layer3_34_conv2_weight"}, {"Ptr": "buf75"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_34_bn2_running_mean"}, {"Ptr": "layer3_34_bn2_running_var"}, {"Ptr": "layer3_34_bn2_weight"}, {"Ptr": "layer3_34_bn2_bias"}, {"Ptr": "var_1070"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf75"}, {"Ptr": "layer3_34_conv3_weight"}, {"Ptr": "layer3_34_bn3_running_mean"}, {"Ptr": "layer3_34_bn3_running_var"}, {"Ptr": "layer3_34_bn3_weight"}, {"Ptr": "layer3_34_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf75"}, {"Ptr": "layer3_35_bn1_running_mean"}, {"Ptr": "layer3_35_bn1_running_var"}, {"Ptr": "layer3_35_bn1_weight"}, {"Ptr": "layer3_35_bn1_bias"}, {"Ptr": "var_1084"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf75"}, {"Ptr": "layer3_35_conv2_weight"}, {"Ptr": "buf83"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern039", [{"Ptr": "buf83"}, {"Ptr": "layer3_35_bn2_running_mean"}, {"Ptr": "layer3_35_bn2_running_var"}, {"Ptr": "layer3_35_bn2_weight"}, {"Ptr": "layer3_35_bn2_bias"}, {"Ptr": "var_1093"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf83"}, {"Ptr": "layer3_35_conv3_weight"}, {"Ptr": "layer3_35_bn3_running_mean"}, {"Ptr": "layer3_35_bn3_running_var"}, {"Ptr": "layer3_35_bn3_weight"}, {"Ptr": "layer3_35_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern182", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "var_1107"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern183", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "buf294"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern184", [{"Ptr": "buf2"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern185", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_bn3_running_mean"}, {"Ptr": "layer4_0_bn3_running_var"}, {"Ptr": "layer4_0_bn3_weight"}, {"Ptr": "layer4_0_bn3_bias"}, {"Ptr": "buf26"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "var_1128"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern186", [{"Ptr": "buf294"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "var_1134"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern187", [{"Ptr": "buf294"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "buf302"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 5120} + }]}, + {"ExecKernel": ["kern188", [{"Ptr": "buf34"}, {"Ptr": "buf26"}, {"Ptr": "layer4_1_bn3_running_mean"}, {"Ptr": "layer4_1_bn3_running_var"}, {"Ptr": "layer4_1_bn3_weight"}, {"Ptr": "layer4_1_bn3_bias"}, {"Ptr": "var_1148"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern186", [{"Ptr": "buf302"}, {"Ptr": "layer4_2_bn1_running_mean"}, {"Ptr": "layer4_2_bn1_running_var"}, {"Ptr": "layer4_2_bn1_weight"}, {"Ptr": "layer4_2_bn1_bias"}, {"Ptr": "var_1154"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern187", [{"Ptr": "buf302"}, {"Ptr": "layer4_2_conv2_weight"}, {"Ptr": "layer4_2_bn2_running_mean"}, {"Ptr": "layer4_2_bn2_running_var"}, {"Ptr": "layer4_2_bn2_weight"}, {"Ptr": "layer4_2_bn2_bias"}, {"Ptr": "buf294"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 5120} + }]}, + {"ExecKernel": ["kern191", [{"Ptr": "buf310"}, {"Ptr": "buf26"}, {"Ptr": "layer4_2_bn3_running_mean"}, {"Ptr": "layer4_2_bn3_running_var"}, {"Ptr": "layer4_2_bn3_weight"}, {"Ptr": "layer4_2_bn3_bias"}, {"Ptr": "buf34"}, {"Ptr": "var_1169"}, {"Ptr": "var_1170"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32} + }]}, + {"ExecKernel": ["kern192", [{"Ptr": "fc_bias"}, {"Ptr": "buf310"}, {"Ptr": "fc_weight"}, {"Ptr": "buf312"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet18.json b/machine_interface/tests/data/hip/test_gpu_resnet18.json new file mode 100644 index 00000000..3c222c9b --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet18.json @@ -0,0 +1,289 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "path": "hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet18/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco", "path": "hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco", "path": "hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco", "path": "hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet18/poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet18/poi_fused_add_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "tem_fused_conv2d_16.hsaco", "path": "hip/resnet18/tem_fused_conv2d_16.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_17.hsaco", "path": "hip/resnet18/poi_fused_add_miopen_batch_norm_relu_17.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet18/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "tem_fused_linear_20.hsaco", "path": "hip/resnet18/tem_fused_linear_20.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern006"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern007"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco", "kernel_name": "kern008"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern009"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco", "kernel_name": "kern010"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern011"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern013"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern015"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern017"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern018"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern019"}, + {"module_name": "tem_fused_conv2d_16.hsaco", "kernel_name": "kern020"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_17.hsaco", "kernel_name": "kern021"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern022"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern024"}, + {"module_name": "tem_fused_linear_20.hsaco", "kernel_name": "kern025"} + ], + "blueprint": { + "inputs": ["arg122_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_9", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_conv2_weight", "layer2_0_downsample_0_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "var_107", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_117", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "var_141", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_159", "var_160", "fc_bias", "fc_weight"], + "buffers": {"buf1": {"Absolute": 3211264}, "buf2": {"Absolute": 802816}, "buf4": {"Absolute": 802816}, "buf6": {"Absolute": 802816}, "buf12": {"Absolute": 401408}, "buf13": {"Absolute": 401408}, "buf20": {"Absolute": 401408}, "buf22": {"Absolute": 200704}, "buf23": {"Absolute": 200704}, "buf29": {"Absolute": 200704}, "buf32": {"Absolute": 100352}, "buf33": {"Absolute": 100352}, "buf39": {"Absolute": 100352}, "buf40": {"Absolute": 2048}, "buf42": {"Absolute": 4000}}, + "outputs": ["buf42"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg122_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf1"}, {"Ptr": "buf2"}, {"Ptr": "var_9"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf6"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern007", [{"Ptr": "buf12"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "buf13"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf13"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf12"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "buf13"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "buf13"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "buf12"}, {"Ptr": "buf20"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf20"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf22"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf20"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf23"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf22"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern015", [{"Ptr": "buf23"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "var_107"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf23"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf29"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf22"}, {"Ptr": "buf29"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_117"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf22"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf32"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4096} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf22"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf33"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "buf32"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "var_141"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf33"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 5120} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf32"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4096} + }]}, + {"ExecKernel": ["kern024", [{"Ptr": "buf40"}, {"Ptr": "buf39"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "buf33"}, {"Ptr": "var_159"}, {"Ptr": "var_160"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "fc_bias"}, {"Ptr": "buf40"}, {"Ptr": "fc_weight"}, {"Ptr": "buf42"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet18batch16.json b/machine_interface/tests/data/hip/test_gpu_resnet18batch16.json new file mode 100644 index 00000000..6997f253 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet18batch16.json @@ -0,0 +1,374 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "path": "hip/resnet18batch16/tem_fused_conv2d_0.hsaco"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet18batch16/poi_fused_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet18batch16/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_3.hsaco", "path": "hip/resnet18batch16/tem_fused_conv2d_3.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "path": "hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_8.hsaco", "path": "hip/resnet18batch16/tem_fused_conv2d_8.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_conv2d_12.hsaco", "path": "hip/resnet18batch16/tem_fused_conv2d_12.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet18batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "tem_fused_conv2d_17.hsaco", "path": "hip/resnet18batch16/tem_fused_conv2d_17.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco", "path": "hip/resnet18batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco", "path": "hip/resnet18batch16/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco"}, + {"module_name": "tem_fused_linear_22.hsaco", "path": "hip/resnet18batch16/tem_fused_linear_22.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern006"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "kernel_name": "kern011"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_conv2d_8.hsaco", "kernel_name": "kern013"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern014"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern015"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern017"}, + {"module_name": "tem_fused_conv2d_12.hsaco", "kernel_name": "kern018"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern019"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern020"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern021"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern025"}, + {"module_name": "tem_fused_conv2d_17.hsaco", "kernel_name": "kern026"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern027"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern028"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco", "kernel_name": "kern029"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco", "kernel_name": "kern033"}, + {"module_name": "tem_fused_linear_22.hsaco", "kernel_name": "kern034"} + ], + "blueprint": { + "inputs": ["arg122_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_8", "var_11", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "var_20", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "var_30", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "var_39", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "var_49", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_conv2_weight", "layer2_0_downsample_0_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "var_73", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "var_90", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "var_99", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "var_123", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_133", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "var_142", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "var_166", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_177", "var_178", "fc_bias", "fc_weight"], + "buffers": {"buf0": {"Absolute": 51380224}, "buf2": {"Absolute": 12845056}, "buf3": {"Absolute": 12845056}, "buf5": {"Absolute": 12845056}, "buf12": {"Absolute": 6422528}, "buf13": {"Absolute": 6422528}, "buf19": {"Absolute": 6422528}, "buf21": {"Absolute": 3211264}, "buf23": {"Absolute": 3211264}, "buf29": {"Absolute": 3211264}, "buf31": {"Absolute": 1605632}, "buf33": {"Absolute": 1605632}, "buf39": {"Absolute": 1605632}, "buf40": {"Absolute": 32768}, "buf42": {"Absolute": 64000}}, + "outputs": ["buf42"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg122_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "buf0"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf0"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "var_8"}], { + "grid_dim_x": {"Absolute": 12544}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf0"}, {"Ptr": "buf2"}, {"Ptr": "var_11"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "buf3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf3"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "var_20"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf3"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "buf5"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "var_30"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf5"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "var_39"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf5"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "buf3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "buf3"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "var_49"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf12"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "buf13"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf13"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf12"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "var_73"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern015", [{"Ptr": "buf13"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf12"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "buf19"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf13"}, {"Ptr": "buf19"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "var_90"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf13"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "buf21"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf21"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "var_99"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf21"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf13"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf23"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf21"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf21"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf23"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "var_123"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf23"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf29"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf21"}, {"Ptr": "buf29"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_133"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern026", [{"Ptr": "buf21"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "buf31"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf31"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "var_142"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern028", [{"Ptr": "buf31"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf21"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf33"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "buf31"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern028", [{"Ptr": "buf31"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf33"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "var_166"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern028", [{"Ptr": "buf33"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf39"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf40"}, {"Ptr": "buf39"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "buf31"}, {"Ptr": "var_177"}, {"Ptr": "var_178"}], { + "grid_dim_x": {"Absolute": 1024}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32} + }]}, + {"ExecKernel": ["kern034", [{"Ptr": "fc_bias"}, {"Ptr": "buf40"}, {"Ptr": "fc_weight"}, {"Ptr": "buf42"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet18batch2.json b/machine_interface/tests/data/hip/test_gpu_resnet18batch2.json new file mode 100644 index 00000000..69cdd594 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet18batch2.json @@ -0,0 +1,309 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet18batch2/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco", "path": "hip/resnet18batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_6.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet18batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet18batch2/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "tem_fused_conv2d_17.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_17.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco", "path": "hip/resnet18batch2/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco"}, + {"module_name": "tem_fused_linear_21.hsaco", "path": "hip/resnet18batch2/tem_fused_linear_21.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern006"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern007"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "kernel_name": "kern008"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern009"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "kernel_name": "kern010"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern013"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern014"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern015"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern017"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern019"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern020"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern021"}, + {"module_name": "tem_fused_conv2d_17.hsaco", "kernel_name": "kern022"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern023"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern024"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco", "kernel_name": "kern026"}, + {"module_name": "tem_fused_linear_21.hsaco", "kernel_name": "kern027"} + ], + "blueprint": { + "inputs": ["arg122_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_9", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_conv2_weight", "layer2_0_downsample_0_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "var_63", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "var_80", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "var_111", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_121", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "var_145", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_163", "var_164", "fc_bias", "fc_weight"], + "buffers": {"buf1": {"Absolute": 6422528}, "buf2": {"Absolute": 1605632}, "buf4": {"Absolute": 1605632}, "buf6": {"Absolute": 1605632}, "buf12": {"Absolute": 802816}, "buf13": {"Absolute": 802816}, "buf19": {"Absolute": 802816}, "buf22": {"Absolute": 401408}, "buf23": {"Absolute": 401408}, "buf29": {"Absolute": 401408}, "buf32": {"Absolute": 200704}, "buf33": {"Absolute": 200704}, "buf39": {"Absolute": 200704}, "buf40": {"Absolute": 4096}, "buf42": {"Absolute": 8000}}, + "outputs": ["buf42"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg122_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf1"}, {"Ptr": "buf2"}, {"Ptr": "var_9"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf6"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern007", [{"Ptr": "buf12"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "buf13"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf13"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf12"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "var_63"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "buf13"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern007", [{"Ptr": "buf12"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "buf19"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf13"}, {"Ptr": "buf19"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "var_80"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf13"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf22"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern015", [{"Ptr": "buf13"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf23"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf22"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf23"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "var_111"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf23"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf29"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf22"}, {"Ptr": "buf29"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_121"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf22"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf32"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf22"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf33"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "buf32"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "var_145"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern024", [{"Ptr": "buf33"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 10240} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf32"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf39"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern026", [{"Ptr": "buf40"}, {"Ptr": "buf39"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "buf33"}, {"Ptr": "var_163"}, {"Ptr": "var_164"}], { + "grid_dim_x": {"Absolute": 1024}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "fc_bias"}, {"Ptr": "buf40"}, {"Ptr": "fc_weight"}, {"Ptr": "buf42"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet18batch32.json b/machine_interface/tests/data/hip/test_gpu_resnet18batch32.json new file mode 100644 index 00000000..70ab1616 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet18batch32.json @@ -0,0 +1,367 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_0.hsaco"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet18batch32/poi_fused_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet18batch32/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_3.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_3.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet18batch32/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_6.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet18batch32/poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco"}, + {"module_name": "tem_fused_conv2d_9.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_9.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "tem_fused_conv2d_14.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_14.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_17.hsaco", "path": "hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_17.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco", "path": "hip/resnet18batch32/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_21.hsaco", "path": "hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_21.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco", "path": "hip/resnet18batch32/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco"}, + {"module_name": "tem_fused_linear_23.hsaco", "path": "hip/resnet18batch32/tem_fused_linear_23.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern006"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "kernel_name": "kern011"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "kernel_name": "kern013"}, + {"module_name": "tem_fused_conv2d_9.hsaco", "kernel_name": "kern014"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern015"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern019"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern020"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern021"}, + {"module_name": "tem_fused_conv2d_14.hsaco", "kernel_name": "kern022"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern023"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern024"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_17.hsaco", "kernel_name": "kern026"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern027"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern028"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco", "kernel_name": "kern029"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_21.hsaco", "kernel_name": "kern030"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco", "kernel_name": "kern032"}, + {"module_name": "tem_fused_linear_23.hsaco", "kernel_name": "kern033"} + ], + "blueprint": { + "inputs": ["arg122_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_8", "var_11", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "var_20", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "var_30", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "var_39", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "var_49", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "var_58", "layer2_0_conv2_weight", "layer2_0_downsample_0_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "var_75", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "var_84", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "var_94", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "var_118", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_135", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_175", "var_176", "fc_bias", "fc_weight"], + "buffers": {"buf0": {"Absolute": 102760448}, "buf2": {"Absolute": 25690112}, "buf3": {"Absolute": 25690112}, "buf5": {"Absolute": 25690112}, "buf11": {"Absolute": 12845056}, "buf13": {"Absolute": 12845056}, "buf19": {"Absolute": 12845056}, "buf22": {"Absolute": 6422528}, "buf23": {"Absolute": 6422528}, "buf29": {"Absolute": 6422528}, "buf32": {"Absolute": 3211264}, "buf33": {"Absolute": 3211264}, "buf39": {"Absolute": 3211264}, "buf40": {"Absolute": 65536}, "buf42": {"Absolute": 128000}}, + "outputs": ["buf42"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg122_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "buf0"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf0"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "var_8"}], { + "grid_dim_x": {"Absolute": 25088}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf0"}, {"Ptr": "buf2"}, {"Ptr": "var_11"}], { + "grid_dim_x": {"Absolute": 12544}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "buf3"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf3"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "var_20"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf3"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "buf5"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "var_30"}], { + "grid_dim_x": {"Absolute": 12544}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf5"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "var_39"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf5"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "buf3"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "buf3"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "var_49"}], { + "grid_dim_x": {"Absolute": 12544}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "buf11"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf11"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "var_58"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf11"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "buf13"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf11"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern015", [{"Ptr": "buf13"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf11"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "var_75"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf13"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "buf11"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf11"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "var_84"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf11"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "buf19"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf13"}, {"Ptr": "buf19"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "var_94"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf13"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf22"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf13"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf23"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "buf22"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "var_118"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern024", [{"Ptr": "buf23"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 34816} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf22"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf29"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern026", [{"Ptr": "buf23"}, {"Ptr": "buf29"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_135"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf23"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 34816} + }]}, + {"ExecKernel": ["kern028", [{"Ptr": "buf32"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf23"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf33"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern030", [{"Ptr": "buf32"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern028", [{"Ptr": "buf33"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf39"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern032", [{"Ptr": "buf40"}, {"Ptr": "buf39"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "buf32"}, {"Ptr": "var_175"}, {"Ptr": "var_176"}], { + "grid_dim_x": {"Absolute": 2048}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "fc_bias"}, {"Ptr": "buf40"}, {"Ptr": "fc_weight"}, {"Ptr": "buf42"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet18batch4.json b/machine_interface/tests/data/hip/test_gpu_resnet18batch4.json new file mode 100644 index 00000000..29b46ca8 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet18batch4.json @@ -0,0 +1,289 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "path": "hip/resnet18batch4/tem_fused_conv2d_0.hsaco"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet18batch4/poi_fused_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet18batch4/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "path": "hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "path": "hip/resnet18batch4/tem_fused_conv2d_6.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "path": "hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet18batch4/poi_fused_add_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco", "path": "hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet18batch4/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet18batch4/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "tem_fused_linear_20.hsaco", "path": "hip/resnet18batch4/tem_fused_linear_20.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern007"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "kernel_name": "kern008"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern009"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "kernel_name": "kern010"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern011"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern013"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern014"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern015"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern017"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern018"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern019"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco", "kernel_name": "kern020"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern022"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern024"}, + {"module_name": "tem_fused_linear_20.hsaco", "kernel_name": "kern025"} + ], + "blueprint": { + "inputs": ["arg122_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_8", "var_11", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_downsample_0_weight", "layer2_0_conv2_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_117", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "var_148", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_159", "var_160", "fc_bias", "fc_weight"], + "buffers": {"buf0": {"Absolute": 12845056}, "buf2": {"Absolute": 3211264}, "buf4": {"Absolute": 3211264}, "buf6": {"Absolute": 3211264}, "buf12": {"Absolute": 1605632}, "buf14": {"Absolute": 1605632}, "buf16": {"Absolute": 1605632}, "buf22": {"Absolute": 802816}, "buf23": {"Absolute": 802816}, "buf29": {"Absolute": 802816}, "buf32": {"Absolute": 401408}, "buf33": {"Absolute": 401408}, "buf39": {"Absolute": 401408}, "buf40": {"Absolute": 8192}, "buf42": {"Absolute": 16000}}, + "outputs": ["buf42"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg122_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "buf0"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf0"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "var_8"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf0"}, {"Ptr": "buf2"}, {"Ptr": "var_11"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf4"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 33024} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf6"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf4"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 33024} + }]}, + {"ExecKernel": ["kern007", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf14"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf12"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf14"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "buf16"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "buf14"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf14"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "buf16"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf12"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf22"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf12"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf23"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern015", [{"Ptr": "buf22"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf23"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf29"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf22"}, {"Ptr": "buf29"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_117"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf22"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf32"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf22"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf33"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf32"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf33"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "var_148"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf33"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf39"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern024", [{"Ptr": "buf40"}, {"Ptr": "buf39"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "buf32"}, {"Ptr": "var_159"}, {"Ptr": "var_160"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "fc_bias"}, {"Ptr": "buf40"}, {"Ptr": "fc_weight"}, {"Ptr": "buf42"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet18batch64.json b/machine_interface/tests/data/hip/test_gpu_resnet18batch64.json new file mode 100644 index 00000000..fad13f8b --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet18batch64.json @@ -0,0 +1,396 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_0.hsaco"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet18batch64/poi_fused_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet18batch64/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_3.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_3.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_6.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco"}, + {"module_name": "tem_fused_conv2d_9.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_9.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_conv2d_12.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_12.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_15.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_15.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_17.hsaco", "path": "hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_17.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "tem_fused_conv2d_20.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_20.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_21.hsaco", "path": "hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_21.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_22.hsaco", "path": "hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_22.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco", "path": "hip/resnet18batch64/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco"}, + {"module_name": "tem_fused_linear_24.hsaco", "path": "hip/resnet18batch64/tem_fused_linear_24.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern006"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "kernel_name": "kern011"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "kernel_name": "kern013"}, + {"module_name": "tem_fused_conv2d_9.hsaco", "kernel_name": "kern014"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern015"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern019"}, + {"module_name": "tem_fused_conv2d_12.hsaco", "kernel_name": "kern020"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern021"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern022"}, + {"module_name": "tem_fused_conv2d_15.hsaco", "kernel_name": "kern023"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern024"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_17.hsaco", "kernel_name": "kern028"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern029"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern030"}, + {"module_name": "tem_fused_conv2d_20.hsaco", "kernel_name": "kern031"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_21.hsaco", "kernel_name": "kern032"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_22.hsaco", "kernel_name": "kern033"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco", "kernel_name": "kern035"}, + {"module_name": "tem_fused_linear_24.hsaco", "kernel_name": "kern036"} + ], + "blueprint": { + "inputs": ["arg122_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_8", "var_11", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "var_20", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "var_30", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "var_39", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "var_49", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "var_58", "layer2_0_conv2_weight", "layer2_0_downsample_0_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "var_75", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "var_84", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "var_94", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "var_103", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "var_120", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "var_129", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_139", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "var_163", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_181", "var_182", "fc_bias", "fc_weight"], + "buffers": {"buf0": {"Absolute": 205520896}, "buf2": {"Absolute": 51380224}, "buf3": {"Absolute": 51380224}, "buf5": {"Absolute": 51380224}, "buf11": {"Absolute": 25690112}, "buf13": {"Absolute": 25690112}, "buf19": {"Absolute": 25690112}, "buf21": {"Absolute": 12845056}, "buf23": {"Absolute": 12845056}, "buf29": {"Absolute": 12845056}, "buf32": {"Absolute": 6422528}, "buf33": {"Absolute": 6422528}, "buf39": {"Absolute": 6422528}, "buf40": {"Absolute": 131072}, "buf42": {"Absolute": 256000}}, + "outputs": ["buf42"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg122_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "buf0"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf0"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "var_8"}], { + "grid_dim_x": {"Absolute": 50176}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf0"}, {"Ptr": "buf2"}, {"Ptr": "var_11"}], { + "grid_dim_x": {"Absolute": 25088}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "buf3"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf3"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "var_20"}], { + "grid_dim_x": {"Absolute": 12544}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf3"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "buf5"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "var_30"}], { + "grid_dim_x": {"Absolute": 25088}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf5"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "var_39"}], { + "grid_dim_x": {"Absolute": 12544}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf5"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "buf3"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "buf3"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "var_49"}], { + "grid_dim_x": {"Absolute": 25088}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "buf11"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf11"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "var_58"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf11"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "buf13"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf11"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern015", [{"Ptr": "buf13"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf11"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "var_75"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf13"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "buf11"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf11"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "var_84"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf11"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "buf19"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf13"}, {"Ptr": "buf19"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "var_94"}], { + "grid_dim_x": {"Absolute": 12544}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf13"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "buf21"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf21"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "var_103"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf21"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf13"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf21"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern024", [{"Ptr": "buf23"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "buf21"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "var_120"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf23"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "buf21"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf21"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "var_129"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf21"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf29"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern028", [{"Ptr": "buf23"}, {"Ptr": "buf29"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_139"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf23"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern030", [{"Ptr": "buf32"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf23"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern032", [{"Ptr": "buf33"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "buf32"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "var_163"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf33"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "buf32"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 34816} + }]}, + {"ExecKernel": ["kern030", [{"Ptr": "buf32"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf39"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern035", [{"Ptr": "buf40"}, {"Ptr": "buf39"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "buf33"}, {"Ptr": "var_181"}, {"Ptr": "var_182"}], { + "grid_dim_x": {"Absolute": 1024}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 128} + }]}, + {"ExecKernel": ["kern036", [{"Ptr": "fc_bias"}, {"Ptr": "buf40"}, {"Ptr": "fc_weight"}, {"Ptr": "buf42"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 12288} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet18batch8.json b/machine_interface/tests/data/hip/test_gpu_resnet18batch8.json new file mode 100644 index 00000000..9492a2cd --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet18batch8.json @@ -0,0 +1,347 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_0.hsaco"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet18batch8/poi_fused_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet18batch8/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_8.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_8.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet18batch8/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet18batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "tem_fused_conv2d_17.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_17.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet18batch8/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco", "path": "hip/resnet18batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco", "path": "hip/resnet18batch8/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco"}, + {"module_name": "tem_fused_linear_22.hsaco", "path": "hip/resnet18batch8/tem_fused_linear_22.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern005"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "kernel_name": "kern009"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern010"}, + {"module_name": "tem_fused_conv2d_8.hsaco", "kernel_name": "kern011"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern012"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern014"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern016"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern017"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern018"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern019"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern020"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern022"}, + {"module_name": "tem_fused_conv2d_17.hsaco", "kernel_name": "kern023"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern024"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern025"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco", "kernel_name": "kern026"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco", "kernel_name": "kern030"}, + {"module_name": "tem_fused_linear_22.hsaco", "kernel_name": "kern031"} + ], + "blueprint": { + "inputs": ["arg122_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_8", "var_11", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "var_28", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "var_45", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_conv2_weight", "layer2_0_downsample_0_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "var_69", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "var_78", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "var_88", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_127", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "var_136", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "var_160", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_171", "var_172", "fc_bias", "fc_weight"], + "buffers": {"buf0": {"Absolute": 25690112}, "buf2": {"Absolute": 6422528}, "buf4": {"Absolute": 6422528}, "buf5": {"Absolute": 6422528}, "buf12": {"Absolute": 3211264}, "buf13": {"Absolute": 3211264}, "buf19": {"Absolute": 3211264}, "buf22": {"Absolute": 1605632}, "buf23": {"Absolute": 1605632}, "buf29": {"Absolute": 1605632}, "buf31": {"Absolute": 802816}, "buf33": {"Absolute": 802816}, "buf39": {"Absolute": 802816}, "buf40": {"Absolute": 16384}, "buf42": {"Absolute": 32000}}, + "outputs": ["buf42"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg122_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "buf0"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf0"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "var_8"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf0"}, {"Ptr": "buf2"}, {"Ptr": "var_11"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf4"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern005", [{"Ptr": "buf2"}, {"Ptr": "buf5"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "var_28"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf5"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern005", [{"Ptr": "buf2"}, {"Ptr": "buf4"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "var_45"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "buf12"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "buf13"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf13"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf12"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "var_69"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "buf13"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf12"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "var_78"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "buf12"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "buf19"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf13"}, {"Ptr": "buf19"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "var_88"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf13"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf22"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf13"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf23"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf22"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf23"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf29"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf22"}, {"Ptr": "buf29"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_127"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf22"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "buf31"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern024", [{"Ptr": "buf31"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "var_136"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf31"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern026", [{"Ptr": "buf22"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf33"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "buf31"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf31"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "buf33"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern024", [{"Ptr": "buf33"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "var_160"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf33"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf39"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern030", [{"Ptr": "buf40"}, {"Ptr": "buf39"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "buf31"}, {"Ptr": "var_171"}, {"Ptr": "var_172"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "fc_bias"}, {"Ptr": "buf40"}, {"Ptr": "fc_weight"}, {"Ptr": "buf42"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet18onnx.json b/machine_interface/tests/data/hip/test_gpu_resnet18onnx.json new file mode 100644 index 00000000..5c5d0d5b --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet18onnx.json @@ -0,0 +1,280 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_relu_0.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_0.hsaco"}, + {"module_name": "poi_fused_conv2d_max_pool2d_relu_1.hsaco", "path": "hip/resnet18onnx/poi_fused_conv2d_max_pool2d_relu_1.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_2.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_2.hsaco"}, + {"module_name": "tem_fused_add_conv2d_relu_3.hsaco", "path": "hip/resnet18onnx/tem_fused_add_conv2d_relu_3.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_4.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_4.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_5.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_5.hsaco"}, + {"module_name": "tem_fused_add_conv2d_relu_6.hsaco", "path": "hip/resnet18onnx/tem_fused_add_conv2d_relu_6.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_7.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_7.hsaco"}, + {"module_name": "tem_fused_add_conv2d_relu_8.hsaco", "path": "hip/resnet18onnx/tem_fused_add_conv2d_relu_8.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_9.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_10.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_10.hsaco"}, + {"module_name": "tem_fused_add_conv2d_relu_11.hsaco", "path": "hip/resnet18onnx/tem_fused_add_conv2d_relu_11.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_12.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_12.hsaco"}, + {"module_name": "poi_fused_add_conv2d_relu_13.hsaco", "path": "hip/resnet18onnx/poi_fused_add_conv2d_relu_13.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_14.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_15.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_15.hsaco"}, + {"module_name": "tem_fused_conv2d_16.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_16.hsaco"}, + {"module_name": "poi_fused_add_conv2d_relu_17.hsaco", "path": "hip/resnet18onnx/poi_fused_add_conv2d_relu_17.hsaco"}, + {"module_name": "tem_fused_conv2d_relu_18.hsaco", "path": "hip/resnet18onnx/tem_fused_conv2d_relu_18.hsaco"}, + {"module_name": "per_fused_add_conv2d_mean_relu_19.hsaco", "path": "hip/resnet18onnx/per_fused_add_conv2d_mean_relu_19.hsaco"}, + {"module_name": "tem_fused_linear_20.hsaco", "path": "hip/resnet18onnx/tem_fused_linear_20.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_relu_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_conv2d_max_pool2d_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "tem_fused_conv2d_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_add_conv2d_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_conv2d_relu_4.hsaco", "kernel_name": "kern006"}, + {"module_name": "tem_fused_conv2d_relu_5.hsaco", "kernel_name": "kern007"}, + {"module_name": "tem_fused_add_conv2d_relu_6.hsaco", "kernel_name": "kern008"}, + {"module_name": "tem_fused_conv2d_relu_7.hsaco", "kernel_name": "kern009"}, + {"module_name": "tem_fused_add_conv2d_relu_8.hsaco", "kernel_name": "kern010"}, + {"module_name": "tem_fused_conv2d_relu_9.hsaco", "kernel_name": "kern011"}, + {"module_name": "tem_fused_conv2d_relu_10.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_add_conv2d_relu_11.hsaco", "kernel_name": "kern013"}, + {"module_name": "tem_fused_conv2d_relu_12.hsaco", "kernel_name": "kern014"}, + {"module_name": "poi_fused_add_conv2d_relu_13.hsaco", "kernel_name": "kern016"}, + {"module_name": "tem_fused_conv2d_relu_14.hsaco", "kernel_name": "kern017"}, + {"module_name": "tem_fused_conv2d_relu_15.hsaco", "kernel_name": "kern018"}, + {"module_name": "tem_fused_conv2d_16.hsaco", "kernel_name": "kern019"}, + {"module_name": "poi_fused_add_conv2d_relu_17.hsaco", "kernel_name": "kern020"}, + {"module_name": "tem_fused_conv2d_relu_18.hsaco", "kernel_name": "kern021"}, + {"module_name": "per_fused_add_conv2d_mean_relu_19.hsaco", "kernel_name": "kern023"}, + {"module_name": "tem_fused_linear_20.hsaco", "kernel_name": "kern024"} + ], + "blueprint": { + "inputs": ["arg42_1", "conv1_Conv_weight", "conv1_Conv_bias", "var_6", "layer1_layer1_0_conv1_Conv_weight", "layer1_layer1_0_conv1_Conv_bias", "layer1_layer1_0_conv2_Conv_weight", "layer1_layer1_0_conv2_Conv_bias", "layer1_layer1_1_conv1_Conv_weight", "layer1_layer1_1_conv1_Conv_bias", "layer1_layer1_1_conv2_Conv_weight", "layer1_layer1_1_conv2_Conv_bias", "layer2_layer2_0_conv1_Conv_weight", "layer2_layer2_0_conv1_Conv_bias", "layer2_layer2_0_conv2_Conv_weight", "layer2_layer2_0_downsample_downsample_0_Conv_weight", "layer2_layer2_0_conv2_Conv_bias", "layer2_layer2_0_downsample_downsample_0_Conv_bias", "layer2_layer2_1_conv1_Conv_weight", "layer2_layer2_1_conv1_Conv_bias", "layer2_layer2_1_conv2_Conv_weight", "layer2_layer2_1_conv2_Conv_bias", "layer3_layer3_0_conv1_Conv_weight", "layer3_layer3_0_conv1_Conv_bias", "layer3_layer3_0_conv2_Conv_weight", "layer3_layer3_0_downsample_downsample_0_Conv_weight", "layer3_layer3_0_conv2_Conv_bias", "layer3_layer3_0_downsample_downsample_0_Conv_bias", "layer3_layer3_1_conv1_Conv_weight", "layer3_layer3_1_conv1_Conv_bias", "layer3_layer3_1_conv2_Conv_weight", "layer3_layer3_1_conv2_Conv_bias", "var_70", "layer4_layer4_0_conv1_Conv_weight", "layer4_layer4_0_conv1_Conv_bias", "layer4_layer4_0_conv2_Conv_weight", "layer4_layer4_0_downsample_downsample_0_Conv_weight", "layer4_layer4_0_conv2_Conv_bias", "layer4_layer4_0_downsample_downsample_0_Conv_bias", "var_85", "layer4_layer4_1_conv1_Conv_weight", "layer4_layer4_1_conv1_Conv_bias", "layer4_layer4_1_conv2_Conv_weight", "layer4_layer4_1_conv2_Conv_bias", "var_97", "var_98", "fc_Gemm_bias", "fc_Gemm_weight"], + "buffers": {"buf1": {"Absolute": 3211264}, "buf2": {"Absolute": 802816}, "buf4": {"Absolute": 802816}, "buf6": {"Absolute": 802816}, "buf12": {"Absolute": 401408}, "buf13": {"Absolute": 401408}, "buf19": {"Absolute": 401408}, "buf21": {"Absolute": 200704}, "buf22": {"Absolute": 200704}, "buf27": {"Absolute": 200704}, "buf30": {"Absolute": 100352}, "buf31": {"Absolute": 100352}, "buf36": {"Absolute": 100352}, "buf37": {"Absolute": 2048}, "buf39": {"Absolute": 4000}}, + "outputs": ["buf39"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg42_1"}, {"Ptr": "conv1_Conv_weight"}, {"Ptr": "conv1_Conv_bias"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf1"}, {"Ptr": "buf2"}, {"Ptr": "var_6"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "layer1_layer1_0_conv1_Conv_weight"}, {"Ptr": "layer1_layer1_0_conv1_Conv_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_layer1_0_conv2_Conv_weight"}, {"Ptr": "layer1_layer1_0_conv2_Conv_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf6"}, {"Ptr": "layer1_layer1_1_conv1_Conv_weight"}, {"Ptr": "layer1_layer1_1_conv1_Conv_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_layer1_1_conv2_Conv_weight"}, {"Ptr": "layer1_layer1_1_conv2_Conv_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "layer2_layer2_0_conv1_Conv_weight"}, {"Ptr": "layer2_layer2_0_conv1_Conv_bias"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern007", [{"Ptr": "buf12"}, {"Ptr": "layer2_layer2_0_conv2_Conv_weight"}, {"Ptr": "buf13"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf2"}, {"Ptr": "layer2_layer2_0_downsample_downsample_0_Conv_weight"}, {"Ptr": "buf13"}, {"Ptr": "layer2_layer2_0_conv2_Conv_bias"}, {"Ptr": "layer2_layer2_0_downsample_downsample_0_Conv_bias"}, {"Ptr": "buf12"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf12"}, {"Ptr": "layer2_layer2_1_conv1_Conv_weight"}, {"Ptr": "layer2_layer2_1_conv1_Conv_bias"}, {"Ptr": "buf13"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "buf13"}, {"Ptr": "layer2_layer2_1_conv2_Conv_weight"}, {"Ptr": "layer2_layer2_1_conv2_Conv_bias"}, {"Ptr": "buf12"}, {"Ptr": "buf19"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf19"}, {"Ptr": "layer3_layer3_0_conv1_Conv_weight"}, {"Ptr": "layer3_layer3_0_conv1_Conv_bias"}, {"Ptr": "buf21"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf21"}, {"Ptr": "layer3_layer3_0_conv2_Conv_weight"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf19"}, {"Ptr": "layer3_layer3_0_downsample_downsample_0_Conv_weight"}, {"Ptr": "buf22"}, {"Ptr": "layer3_layer3_0_conv2_Conv_bias"}, {"Ptr": "layer3_layer3_0_downsample_downsample_0_Conv_bias"}, {"Ptr": "buf21"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf21"}, {"Ptr": "layer3_layer3_1_conv1_Conv_weight"}, {"Ptr": "layer3_layer3_1_conv1_Conv_bias"}, {"Ptr": "buf22"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf22"}, {"Ptr": "layer3_layer3_1_conv2_Conv_weight"}, {"Ptr": "buf27"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf21"}, {"Ptr": "buf27"}, {"Ptr": "layer3_layer3_1_conv2_Conv_bias"}, {"Ptr": "var_70"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf21"}, {"Ptr": "layer4_layer4_0_conv1_Conv_weight"}, {"Ptr": "layer4_layer4_0_conv1_Conv_bias"}, {"Ptr": "buf30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf30"}, {"Ptr": "layer4_layer4_0_conv2_Conv_weight"}, {"Ptr": "buf31"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4096} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf21"}, {"Ptr": "layer4_layer4_0_downsample_downsample_0_Conv_weight"}, {"Ptr": "buf30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf31"}, {"Ptr": "layer4_layer4_0_conv2_Conv_bias"}, {"Ptr": "buf30"}, {"Ptr": "layer4_layer4_0_downsample_downsample_0_Conv_bias"}, {"Ptr": "var_85"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf31"}, {"Ptr": "layer4_layer4_1_conv1_Conv_weight"}, {"Ptr": "layer4_layer4_1_conv1_Conv_bias"}, {"Ptr": "buf30"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 5120} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf30"}, {"Ptr": "layer4_layer4_1_conv2_Conv_weight"}, {"Ptr": "buf36"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4096} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf37"}, {"Ptr": "buf36"}, {"Ptr": "layer4_layer4_1_conv2_Conv_bias"}, {"Ptr": "buf31"}, {"Ptr": "var_97"}, {"Ptr": "var_98"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4} + }]}, + {"ExecKernel": ["kern024", [{"Ptr": "fc_Gemm_bias"}, {"Ptr": "buf37"}, {"Ptr": "fc_Gemm_weight"}, {"Ptr": "buf39"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet34.json b/machine_interface/tests/data/hip/test_gpu_resnet34.json new file mode 100644 index 00000000..497c0b8b --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet34.json @@ -0,0 +1,516 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "path": "hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet34/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco", "path": "hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco", "path": "hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco", "path": "hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet34/poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet34/poi_fused_add_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "tem_fused_conv2d_16.hsaco", "path": "hip/resnet34/tem_fused_conv2d_16.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_17.hsaco", "path": "hip/resnet34/poi_fused_add_miopen_batch_norm_relu_17.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet34/poi_fused_add_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco", "path": "hip/resnet34/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco"}, + {"module_name": "tem_fused_linear_21.hsaco", "path": "hip/resnet34/tem_fused_linear_21.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern008"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern009"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco", "kernel_name": "kern010"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern011"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern017"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern018"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern019"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern021"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern023"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern040"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern041"}, + {"module_name": "tem_fused_conv2d_16.hsaco", "kernel_name": "kern042"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_17.hsaco", "kernel_name": "kern043"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern044"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern046"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco", "kernel_name": "kern049"}, + {"module_name": "tem_fused_linear_21.hsaco", "kernel_name": "kern050"} + ], + "blueprint": { + "inputs": ["arg218_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_9", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "layer1_2_conv1_weight", "layer1_2_bn1_running_mean", "layer1_2_bn1_running_var", "layer1_2_bn1_weight", "layer1_2_bn1_bias", "layer1_2_conv2_weight", "layer1_2_bn2_running_mean", "layer1_2_bn2_running_var", "layer1_2_bn2_weight", "layer1_2_bn2_bias", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_conv2_weight", "layer2_0_downsample_0_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "layer2_2_conv1_weight", "layer2_2_bn1_running_mean", "layer2_2_bn1_running_var", "layer2_2_bn1_weight", "layer2_2_bn1_bias", "layer2_2_conv2_weight", "layer2_2_bn2_running_mean", "layer2_2_bn2_running_var", "layer2_2_bn2_weight", "layer2_2_bn2_bias", "layer2_3_conv1_weight", "layer2_3_bn1_running_mean", "layer2_3_bn1_running_var", "layer2_3_bn1_weight", "layer2_3_bn1_bias", "layer2_3_conv2_weight", "layer2_3_bn2_running_mean", "layer2_3_bn2_running_var", "layer2_3_bn2_weight", "layer2_3_bn2_bias", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "var_152", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_162", "layer3_2_conv1_weight", "layer3_2_bn1_running_mean", "layer3_2_bn1_running_var", "layer3_2_bn1_weight", "layer3_2_bn1_bias", "var_171", "layer3_2_conv2_weight", "layer3_2_bn2_running_mean", "layer3_2_bn2_running_var", "layer3_2_bn2_weight", "layer3_2_bn2_bias", "var_181", "layer3_3_conv1_weight", "layer3_3_bn1_running_mean", "layer3_3_bn1_running_var", "layer3_3_bn1_weight", "layer3_3_bn1_bias", "var_190", "layer3_3_conv2_weight", "layer3_3_bn2_running_mean", "layer3_3_bn2_running_var", "layer3_3_bn2_weight", "layer3_3_bn2_bias", "var_200", "layer3_4_conv1_weight", "layer3_4_bn1_running_mean", "layer3_4_bn1_running_var", "layer3_4_bn1_weight", "layer3_4_bn1_bias", "var_209", "layer3_4_conv2_weight", "layer3_4_bn2_running_mean", "layer3_4_bn2_running_var", "layer3_4_bn2_weight", "layer3_4_bn2_bias", "var_219", "layer3_5_conv1_weight", "layer3_5_bn1_running_mean", "layer3_5_bn1_running_var", "layer3_5_bn1_weight", "layer3_5_bn1_bias", "var_228", "layer3_5_conv2_weight", "layer3_5_bn2_running_mean", "layer3_5_bn2_running_var", "layer3_5_bn2_weight", "layer3_5_bn2_bias", "var_238", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "var_262", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_279", "layer4_2_conv1_weight", "layer4_2_bn1_running_mean", "layer4_2_bn1_running_var", "layer4_2_bn1_weight", "layer4_2_bn1_bias", "layer4_2_conv2_weight", "layer4_2_bn2_running_mean", "layer4_2_bn2_running_var", "layer4_2_bn2_weight", "layer4_2_bn2_bias", "var_297", "var_298", "fc_bias", "fc_weight"], + "buffers": {"buf1": {"Absolute": 3211264}, "buf2": {"Absolute": 802816}, "buf4": {"Absolute": 802816}, "buf6": {"Absolute": 802816}, "buf16": {"Absolute": 401408}, "buf17": {"Absolute": 401408}, "buf24": {"Absolute": 401408}, "buf34": {"Absolute": 200704}, "buf35": {"Absolute": 200704}, "buf41": {"Absolute": 200704}, "buf60": {"Absolute": 100352}, "buf61": {"Absolute": 100352}, "buf67": {"Absolute": 100352}, "buf72": {"Absolute": 2048}, "buf74": {"Absolute": 4000}}, + "outputs": ["buf74"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg218_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf1"}, {"Ptr": "buf2"}, {"Ptr": "var_9"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf6"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "layer1_2_conv1_weight"}, {"Ptr": "layer1_2_bn1_running_mean"}, {"Ptr": "layer1_2_bn1_running_var"}, {"Ptr": "layer1_2_bn1_weight"}, {"Ptr": "layer1_2_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_2_conv2_weight"}, {"Ptr": "layer1_2_bn2_running_mean"}, {"Ptr": "layer1_2_bn2_running_var"}, {"Ptr": "layer1_2_bn2_weight"}, {"Ptr": "layer1_2_bn2_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf6"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf16"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "buf17"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "buf6"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf17"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf16"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "buf17"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf17"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "buf16"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf24"}, {"Ptr": "layer2_2_conv1_weight"}, {"Ptr": "layer2_2_bn1_running_mean"}, {"Ptr": "layer2_2_bn1_running_var"}, {"Ptr": "layer2_2_bn1_weight"}, {"Ptr": "layer2_2_bn1_bias"}, {"Ptr": "buf17"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf17"}, {"Ptr": "layer2_2_conv2_weight"}, {"Ptr": "layer2_2_bn2_running_mean"}, {"Ptr": "layer2_2_bn2_running_var"}, {"Ptr": "layer2_2_bn2_weight"}, {"Ptr": "layer2_2_bn2_bias"}, {"Ptr": "buf24"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf16"}, {"Ptr": "layer2_3_conv1_weight"}, {"Ptr": "layer2_3_bn1_running_mean"}, {"Ptr": "layer2_3_bn1_running_var"}, {"Ptr": "layer2_3_bn1_weight"}, {"Ptr": "layer2_3_bn1_bias"}, {"Ptr": "buf17"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf17"}, {"Ptr": "layer2_3_conv2_weight"}, {"Ptr": "layer2_3_bn2_running_mean"}, {"Ptr": "layer2_3_bn2_running_var"}, {"Ptr": "layer2_3_bn2_weight"}, {"Ptr": "layer2_3_bn2_bias"}, {"Ptr": "buf16"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf24"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf24"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf35"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf35"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "var_152"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf35"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_162"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer3_2_conv1_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf41"}, {"Ptr": "layer3_2_bn1_running_mean"}, {"Ptr": "layer3_2_bn1_running_var"}, {"Ptr": "layer3_2_bn1_weight"}, {"Ptr": "layer3_2_bn1_bias"}, {"Ptr": "var_171"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf41"}, {"Ptr": "layer3_2_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf34"}, {"Ptr": "buf35"}, {"Ptr": "layer3_2_bn2_running_mean"}, {"Ptr": "layer3_2_bn2_running_var"}, {"Ptr": "layer3_2_bn2_weight"}, {"Ptr": "layer3_2_bn2_bias"}, {"Ptr": "var_181"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer3_3_conv1_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf35"}, {"Ptr": "layer3_3_bn1_running_mean"}, {"Ptr": "layer3_3_bn1_running_var"}, {"Ptr": "layer3_3_bn1_weight"}, {"Ptr": "layer3_3_bn1_bias"}, {"Ptr": "var_190"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf35"}, {"Ptr": "layer3_3_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_3_bn2_running_mean"}, {"Ptr": "layer3_3_bn2_running_var"}, {"Ptr": "layer3_3_bn2_weight"}, {"Ptr": "layer3_3_bn2_bias"}, {"Ptr": "var_200"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer3_4_conv1_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf41"}, {"Ptr": "layer3_4_bn1_running_mean"}, {"Ptr": "layer3_4_bn1_running_var"}, {"Ptr": "layer3_4_bn1_weight"}, {"Ptr": "layer3_4_bn1_bias"}, {"Ptr": "var_209"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf41"}, {"Ptr": "layer3_4_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf34"}, {"Ptr": "buf35"}, {"Ptr": "layer3_4_bn2_running_mean"}, {"Ptr": "layer3_4_bn2_running_var"}, {"Ptr": "layer3_4_bn2_weight"}, {"Ptr": "layer3_4_bn2_bias"}, {"Ptr": "var_219"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer3_5_conv1_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf35"}, {"Ptr": "layer3_5_bn1_running_mean"}, {"Ptr": "layer3_5_bn1_running_var"}, {"Ptr": "layer3_5_bn1_weight"}, {"Ptr": "layer3_5_bn1_bias"}, {"Ptr": "var_228"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf35"}, {"Ptr": "layer3_5_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_5_bn2_running_mean"}, {"Ptr": "layer3_5_bn2_running_var"}, {"Ptr": "layer3_5_bn2_weight"}, {"Ptr": "layer3_5_bn2_bias"}, {"Ptr": "var_238"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern040", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "buf60"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern041", [{"Ptr": "buf60"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4096} + }]}, + {"ExecKernel": ["kern042", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf60"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf61"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "buf60"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "var_262"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern044", [{"Ptr": "buf61"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "buf60"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 5120} + }]}, + {"ExecKernel": ["kern041", [{"Ptr": "buf60"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf67"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4096} + }]}, + {"ExecKernel": ["kern046", [{"Ptr": "buf61"}, {"Ptr": "buf67"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "var_279"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern044", [{"Ptr": "buf61"}, {"Ptr": "layer4_2_conv1_weight"}, {"Ptr": "layer4_2_bn1_running_mean"}, {"Ptr": "layer4_2_bn1_running_var"}, {"Ptr": "layer4_2_bn1_weight"}, {"Ptr": "layer4_2_bn1_bias"}, {"Ptr": "buf67"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 5120} + }]}, + {"ExecKernel": ["kern041", [{"Ptr": "buf67"}, {"Ptr": "layer4_2_conv2_weight"}, {"Ptr": "buf60"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4096} + }]}, + {"ExecKernel": ["kern049", [{"Ptr": "buf72"}, {"Ptr": "buf60"}, {"Ptr": "layer4_2_bn2_running_mean"}, {"Ptr": "layer4_2_bn2_running_var"}, {"Ptr": "layer4_2_bn2_weight"}, {"Ptr": "layer4_2_bn2_bias"}, {"Ptr": "buf61"}, {"Ptr": "var_297"}, {"Ptr": "var_298"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4} + }]}, + {"ExecKernel": ["kern050", [{"Ptr": "fc_bias"}, {"Ptr": "buf72"}, {"Ptr": "fc_weight"}, {"Ptr": "buf74"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet34batch16.json b/machine_interface/tests/data/hip/test_gpu_resnet34batch16.json new file mode 100644 index 00000000..c5885302 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet34batch16.json @@ -0,0 +1,648 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_0.hsaco"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet34batch16/poi_fused_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet34batch16/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_3.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_3.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_8.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_8.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet34batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "tem_fused_conv2d_17.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_17.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "tem_fused_conv2d_20.hsaco", "path": "hip/resnet34batch16/tem_fused_conv2d_20.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_21.hsaco", "path": "hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_21.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_22.hsaco", "path": "hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_22.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco", "path": "hip/resnet34batch16/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco"}, + {"module_name": "tem_fused_linear_24.hsaco", "path": "hip/resnet34batch16/tem_fused_linear_24.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern006"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "kernel_name": "kern015"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern016"}, + {"module_name": "tem_fused_conv2d_8.hsaco", "kernel_name": "kern017"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern018"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern019"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern021"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern028"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern029"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern030"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern032"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern034"}, + {"module_name": "tem_fused_conv2d_17.hsaco", "kernel_name": "kern051"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern052"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern053"}, + {"module_name": "tem_fused_conv2d_20.hsaco", "kernel_name": "kern054"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_21.hsaco", "kernel_name": "kern055"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_22.hsaco", "kernel_name": "kern059"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco", "kernel_name": "kern063"}, + {"module_name": "tem_fused_linear_24.hsaco", "kernel_name": "kern064"} + ], + "blueprint": { + "inputs": ["arg218_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_8", "var_11", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "var_20", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "var_30", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "var_39", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "var_49", "layer1_2_conv1_weight", "layer1_2_bn1_running_mean", "layer1_2_bn1_running_var", "layer1_2_bn1_weight", "layer1_2_bn1_bias", "var_58", "layer1_2_conv2_weight", "layer1_2_bn2_running_mean", "layer1_2_bn2_running_var", "layer1_2_bn2_weight", "layer1_2_bn2_bias", "var_68", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_conv2_weight", "layer2_0_downsample_0_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "var_92", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "var_109", "layer2_2_conv1_weight", "layer2_2_bn1_running_mean", "layer2_2_bn1_running_var", "layer2_2_bn1_weight", "layer2_2_bn1_bias", "layer2_2_conv2_weight", "layer2_2_bn2_running_mean", "layer2_2_bn2_running_var", "layer2_2_bn2_weight", "layer2_2_bn2_bias", "var_126", "layer2_3_conv1_weight", "layer2_3_bn1_running_mean", "layer2_3_bn1_running_var", "layer2_3_bn1_weight", "layer2_3_bn1_bias", "layer2_3_conv2_weight", "layer2_3_bn2_running_mean", "layer2_3_bn2_running_var", "layer2_3_bn2_weight", "layer2_3_bn2_bias", "var_143", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "var_174", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_184", "layer3_2_conv1_weight", "layer3_2_bn1_running_mean", "layer3_2_bn1_running_var", "layer3_2_bn1_weight", "layer3_2_bn1_bias", "var_193", "layer3_2_conv2_weight", "layer3_2_bn2_running_mean", "layer3_2_bn2_running_var", "layer3_2_bn2_weight", "layer3_2_bn2_bias", "var_203", "layer3_3_conv1_weight", "layer3_3_bn1_running_mean", "layer3_3_bn1_running_var", "layer3_3_bn1_weight", "layer3_3_bn1_bias", "var_212", "layer3_3_conv2_weight", "layer3_3_bn2_running_mean", "layer3_3_bn2_running_var", "layer3_3_bn2_weight", "layer3_3_bn2_bias", "var_222", "layer3_4_conv1_weight", "layer3_4_bn1_running_mean", "layer3_4_bn1_running_var", "layer3_4_bn1_weight", "layer3_4_bn1_bias", "var_231", "layer3_4_conv2_weight", "layer3_4_bn2_running_mean", "layer3_4_bn2_running_var", "layer3_4_bn2_weight", "layer3_4_bn2_bias", "var_241", "layer3_5_conv1_weight", "layer3_5_bn1_running_mean", "layer3_5_bn1_running_var", "layer3_5_bn1_weight", "layer3_5_bn1_bias", "var_250", "layer3_5_conv2_weight", "layer3_5_bn2_running_mean", "layer3_5_bn2_running_var", "layer3_5_bn2_weight", "layer3_5_bn2_bias", "var_260", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "var_269", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "var_286", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "var_295", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_305", "layer4_2_conv1_weight", "layer4_2_bn1_running_mean", "layer4_2_bn1_running_var", "layer4_2_bn1_weight", "layer4_2_bn1_bias", "var_314", "layer4_2_conv2_weight", "layer4_2_bn2_running_mean", "layer4_2_bn2_running_var", "layer4_2_bn2_weight", "layer4_2_bn2_bias", "var_325", "var_326", "fc_bias", "fc_weight"], + "buffers": {"buf0": {"Absolute": 51380224}, "buf2": {"Absolute": 12845056}, "buf3": {"Absolute": 12845056}, "buf5": {"Absolute": 12845056}, "buf16": {"Absolute": 6422528}, "buf17": {"Absolute": 6422528}, "buf23": {"Absolute": 6422528}, "buf34": {"Absolute": 3211264}, "buf35": {"Absolute": 3211264}, "buf41": {"Absolute": 3211264}, "buf59": {"Absolute": 1605632}, "buf61": {"Absolute": 1605632}, "buf67": {"Absolute": 1605632}, "buf72": {"Absolute": 32768}, "buf74": {"Absolute": 64000}}, + "outputs": ["buf74"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg218_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "buf0"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf0"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "var_8"}], { + "grid_dim_x": {"Absolute": 12544}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf0"}, {"Ptr": "buf2"}, {"Ptr": "var_11"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "buf3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf3"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "var_20"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf3"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "buf5"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "var_30"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf5"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "var_39"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf5"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "buf3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "buf3"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "var_49"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_2_conv1_weight"}, {"Ptr": "buf3"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf3"}, {"Ptr": "layer1_2_bn1_running_mean"}, {"Ptr": "layer1_2_bn1_running_var"}, {"Ptr": "layer1_2_bn1_weight"}, {"Ptr": "layer1_2_bn1_bias"}, {"Ptr": "var_58"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf3"}, {"Ptr": "layer1_2_conv2_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf2"}, {"Ptr": "buf5"}, {"Ptr": "layer1_2_bn2_running_mean"}, {"Ptr": "layer1_2_bn2_running_var"}, {"Ptr": "layer1_2_bn2_weight"}, {"Ptr": "layer1_2_bn2_bias"}, {"Ptr": "var_68"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern015", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf16"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "buf17"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf17"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf16"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "var_92"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf17"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf16"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf17"}, {"Ptr": "buf23"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "var_109"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf17"}, {"Ptr": "layer2_2_conv1_weight"}, {"Ptr": "layer2_2_bn1_running_mean"}, {"Ptr": "layer2_2_bn1_running_var"}, {"Ptr": "layer2_2_bn1_weight"}, {"Ptr": "layer2_2_bn1_bias"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf23"}, {"Ptr": "layer2_2_conv2_weight"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf17"}, {"Ptr": "buf16"}, {"Ptr": "layer2_2_bn2_running_mean"}, {"Ptr": "layer2_2_bn2_running_var"}, {"Ptr": "layer2_2_bn2_weight"}, {"Ptr": "layer2_2_bn2_bias"}, {"Ptr": "var_126"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf17"}, {"Ptr": "layer2_3_conv1_weight"}, {"Ptr": "layer2_3_bn1_running_mean"}, {"Ptr": "layer2_3_bn1_running_var"}, {"Ptr": "layer2_3_bn1_weight"}, {"Ptr": "layer2_3_bn1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf16"}, {"Ptr": "layer2_3_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf17"}, {"Ptr": "buf23"}, {"Ptr": "layer2_3_bn2_running_mean"}, {"Ptr": "layer2_3_bn2_running_var"}, {"Ptr": "layer2_3_bn2_weight"}, {"Ptr": "layer2_3_bn2_bias"}, {"Ptr": "var_143"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern028", [{"Ptr": "buf17"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 34816} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf34"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern030", [{"Ptr": "buf17"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf35"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf34"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern032", [{"Ptr": "buf35"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "var_174"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf35"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern034", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_184"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf34"}, {"Ptr": "layer3_2_conv1_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern032", [{"Ptr": "buf41"}, {"Ptr": "layer3_2_bn1_running_mean"}, {"Ptr": "layer3_2_bn1_running_var"}, {"Ptr": "layer3_2_bn1_weight"}, {"Ptr": "layer3_2_bn1_bias"}, {"Ptr": "var_193"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf41"}, {"Ptr": "layer3_2_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern034", [{"Ptr": "buf34"}, {"Ptr": "buf35"}, {"Ptr": "layer3_2_bn2_running_mean"}, {"Ptr": "layer3_2_bn2_running_var"}, {"Ptr": "layer3_2_bn2_weight"}, {"Ptr": "layer3_2_bn2_bias"}, {"Ptr": "var_203"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf34"}, {"Ptr": "layer3_3_conv1_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern032", [{"Ptr": "buf35"}, {"Ptr": "layer3_3_bn1_running_mean"}, {"Ptr": "layer3_3_bn1_running_var"}, {"Ptr": "layer3_3_bn1_weight"}, {"Ptr": "layer3_3_bn1_bias"}, {"Ptr": "var_212"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf35"}, {"Ptr": "layer3_3_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern034", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_3_bn2_running_mean"}, {"Ptr": "layer3_3_bn2_running_var"}, {"Ptr": "layer3_3_bn2_weight"}, {"Ptr": "layer3_3_bn2_bias"}, {"Ptr": "var_222"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf34"}, {"Ptr": "layer3_4_conv1_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern032", [{"Ptr": "buf41"}, {"Ptr": "layer3_4_bn1_running_mean"}, {"Ptr": "layer3_4_bn1_running_var"}, {"Ptr": "layer3_4_bn1_weight"}, {"Ptr": "layer3_4_bn1_bias"}, {"Ptr": "var_231"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf41"}, {"Ptr": "layer3_4_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern034", [{"Ptr": "buf34"}, {"Ptr": "buf35"}, {"Ptr": "layer3_4_bn2_running_mean"}, {"Ptr": "layer3_4_bn2_running_var"}, {"Ptr": "layer3_4_bn2_weight"}, {"Ptr": "layer3_4_bn2_bias"}, {"Ptr": "var_241"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf34"}, {"Ptr": "layer3_5_conv1_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern032", [{"Ptr": "buf35"}, {"Ptr": "layer3_5_bn1_running_mean"}, {"Ptr": "layer3_5_bn1_running_var"}, {"Ptr": "layer3_5_bn1_weight"}, {"Ptr": "layer3_5_bn1_bias"}, {"Ptr": "var_250"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf35"}, {"Ptr": "layer3_5_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern034", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_5_bn2_running_mean"}, {"Ptr": "layer3_5_bn2_running_var"}, {"Ptr": "layer3_5_bn2_weight"}, {"Ptr": "layer3_5_bn2_bias"}, {"Ptr": "var_260"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern051", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "buf59"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern052", [{"Ptr": "buf59"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "var_269"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern053", [{"Ptr": "buf59"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern054", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf59"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern055", [{"Ptr": "buf61"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "buf59"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "var_286"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern053", [{"Ptr": "buf61"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "buf59"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern052", [{"Ptr": "buf59"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "var_295"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern053", [{"Ptr": "buf59"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf67"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern059", [{"Ptr": "buf61"}, {"Ptr": "buf67"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "var_305"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern053", [{"Ptr": "buf61"}, {"Ptr": "layer4_2_conv1_weight"}, {"Ptr": "buf67"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern052", [{"Ptr": "buf67"}, {"Ptr": "layer4_2_bn1_running_mean"}, {"Ptr": "layer4_2_bn1_running_var"}, {"Ptr": "layer4_2_bn1_weight"}, {"Ptr": "layer4_2_bn1_bias"}, {"Ptr": "var_314"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern053", [{"Ptr": "buf67"}, {"Ptr": "layer4_2_conv2_weight"}, {"Ptr": "buf59"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern063", [{"Ptr": "buf72"}, {"Ptr": "buf59"}, {"Ptr": "layer4_2_bn2_running_mean"}, {"Ptr": "layer4_2_bn2_running_var"}, {"Ptr": "layer4_2_bn2_weight"}, {"Ptr": "layer4_2_bn2_bias"}, {"Ptr": "buf61"}, {"Ptr": "var_325"}, {"Ptr": "var_326"}], { + "grid_dim_x": {"Absolute": 1024}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32} + }]}, + {"ExecKernel": ["kern064", [{"Ptr": "fc_bias"}, {"Ptr": "buf72"}, {"Ptr": "fc_weight"}, {"Ptr": "buf74"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet34batch2.json b/machine_interface/tests/data/hip/test_gpu_resnet34batch2.json new file mode 100644 index 00000000..7775dbc5 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet34batch2.json @@ -0,0 +1,543 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet34batch2/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco", "path": "hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_6.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet34batch2/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco", "path": "hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco", "path": "hip/resnet34batch2/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco"}, + {"module_name": "tem_fused_linear_21.hsaco", "path": "hip/resnet34batch2/tem_fused_linear_21.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern008"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern009"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "kernel_name": "kern010"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern011"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "kernel_name": "kern012"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern014"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern021"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern022"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern023"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern025"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern027"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern044"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern045"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco", "kernel_name": "kern046"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern047"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern049"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco", "kernel_name": "kern052"}, + {"module_name": "tem_fused_linear_21.hsaco", "kernel_name": "kern053"} + ], + "blueprint": { + "inputs": ["arg218_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_9", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "layer1_2_conv1_weight", "layer1_2_bn1_running_mean", "layer1_2_bn1_running_var", "layer1_2_bn1_weight", "layer1_2_bn1_bias", "layer1_2_conv2_weight", "layer1_2_bn2_running_mean", "layer1_2_bn2_running_var", "layer1_2_bn2_weight", "layer1_2_bn2_bias", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_conv2_weight", "layer2_0_downsample_0_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "var_78", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "var_95", "layer2_2_conv1_weight", "layer2_2_bn1_running_mean", "layer2_2_bn1_running_var", "layer2_2_bn1_weight", "layer2_2_bn1_bias", "layer2_2_conv2_weight", "layer2_2_bn2_running_mean", "layer2_2_bn2_running_var", "layer2_2_bn2_weight", "layer2_2_bn2_bias", "var_112", "layer2_3_conv1_weight", "layer2_3_bn1_running_mean", "layer2_3_bn1_running_var", "layer2_3_bn1_weight", "layer2_3_bn1_bias", "layer2_3_conv2_weight", "layer2_3_bn2_running_mean", "layer2_3_bn2_running_var", "layer2_3_bn2_weight", "layer2_3_bn2_bias", "var_129", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "var_160", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_170", "layer3_2_conv1_weight", "layer3_2_bn1_running_mean", "layer3_2_bn1_running_var", "layer3_2_bn1_weight", "layer3_2_bn1_bias", "var_179", "layer3_2_conv2_weight", "layer3_2_bn2_running_mean", "layer3_2_bn2_running_var", "layer3_2_bn2_weight", "layer3_2_bn2_bias", "var_189", "layer3_3_conv1_weight", "layer3_3_bn1_running_mean", "layer3_3_bn1_running_var", "layer3_3_bn1_weight", "layer3_3_bn1_bias", "var_198", "layer3_3_conv2_weight", "layer3_3_bn2_running_mean", "layer3_3_bn2_running_var", "layer3_3_bn2_weight", "layer3_3_bn2_bias", "var_208", "layer3_4_conv1_weight", "layer3_4_bn1_running_mean", "layer3_4_bn1_running_var", "layer3_4_bn1_weight", "layer3_4_bn1_bias", "var_217", "layer3_4_conv2_weight", "layer3_4_bn2_running_mean", "layer3_4_bn2_running_var", "layer3_4_bn2_weight", "layer3_4_bn2_bias", "var_227", "layer3_5_conv1_weight", "layer3_5_bn1_running_mean", "layer3_5_bn1_running_var", "layer3_5_bn1_weight", "layer3_5_bn1_bias", "var_236", "layer3_5_conv2_weight", "layer3_5_bn2_running_mean", "layer3_5_bn2_running_var", "layer3_5_bn2_weight", "layer3_5_bn2_bias", "var_246", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_285", "layer4_2_conv1_weight", "layer4_2_bn1_running_mean", "layer4_2_bn1_running_var", "layer4_2_bn1_weight", "layer4_2_bn1_bias", "layer4_2_conv2_weight", "layer4_2_bn2_running_mean", "layer4_2_bn2_running_var", "layer4_2_bn2_weight", "layer4_2_bn2_bias", "var_303", "var_304", "fc_bias", "fc_weight"], + "buffers": {"buf1": {"Absolute": 6422528}, "buf2": {"Absolute": 1605632}, "buf4": {"Absolute": 1605632}, "buf6": {"Absolute": 1605632}, "buf16": {"Absolute": 802816}, "buf17": {"Absolute": 802816}, "buf23": {"Absolute": 802816}, "buf34": {"Absolute": 401408}, "buf35": {"Absolute": 401408}, "buf41": {"Absolute": 401408}, "buf60": {"Absolute": 200704}, "buf61": {"Absolute": 200704}, "buf67": {"Absolute": 200704}, "buf72": {"Absolute": 4096}, "buf74": {"Absolute": 8000}}, + "outputs": ["buf74"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg218_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf1"}, {"Ptr": "buf2"}, {"Ptr": "var_9"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf6"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "layer1_2_conv1_weight"}, {"Ptr": "layer1_2_bn1_running_mean"}, {"Ptr": "layer1_2_bn1_running_var"}, {"Ptr": "layer1_2_bn1_weight"}, {"Ptr": "layer1_2_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_2_conv2_weight"}, {"Ptr": "layer1_2_bn2_running_mean"}, {"Ptr": "layer1_2_bn2_running_var"}, {"Ptr": "layer1_2_bn2_weight"}, {"Ptr": "layer1_2_bn2_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf6"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf16"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "buf17"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "buf6"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf17"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf16"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "var_78"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf17"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf16"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf17"}, {"Ptr": "buf23"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "var_95"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf17"}, {"Ptr": "layer2_2_conv1_weight"}, {"Ptr": "layer2_2_bn1_running_mean"}, {"Ptr": "layer2_2_bn1_running_var"}, {"Ptr": "layer2_2_bn1_weight"}, {"Ptr": "layer2_2_bn1_bias"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf23"}, {"Ptr": "layer2_2_conv2_weight"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf17"}, {"Ptr": "buf16"}, {"Ptr": "layer2_2_bn2_running_mean"}, {"Ptr": "layer2_2_bn2_running_var"}, {"Ptr": "layer2_2_bn2_weight"}, {"Ptr": "layer2_2_bn2_bias"}, {"Ptr": "var_112"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf17"}, {"Ptr": "layer2_3_conv1_weight"}, {"Ptr": "layer2_3_bn1_running_mean"}, {"Ptr": "layer2_3_bn1_running_var"}, {"Ptr": "layer2_3_bn1_weight"}, {"Ptr": "layer2_3_bn1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf16"}, {"Ptr": "layer2_3_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf17"}, {"Ptr": "buf23"}, {"Ptr": "layer2_3_bn2_running_mean"}, {"Ptr": "layer2_3_bn2_running_var"}, {"Ptr": "layer2_3_bn2_weight"}, {"Ptr": "layer2_3_bn2_bias"}, {"Ptr": "var_129"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf17"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf34"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf17"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf35"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf34"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf35"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "var_160"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf35"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_170"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf34"}, {"Ptr": "layer3_2_conv1_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf41"}, {"Ptr": "layer3_2_bn1_running_mean"}, {"Ptr": "layer3_2_bn1_running_var"}, {"Ptr": "layer3_2_bn1_weight"}, {"Ptr": "layer3_2_bn1_bias"}, {"Ptr": "var_179"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf41"}, {"Ptr": "layer3_2_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf34"}, {"Ptr": "buf35"}, {"Ptr": "layer3_2_bn2_running_mean"}, {"Ptr": "layer3_2_bn2_running_var"}, {"Ptr": "layer3_2_bn2_weight"}, {"Ptr": "layer3_2_bn2_bias"}, {"Ptr": "var_189"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf34"}, {"Ptr": "layer3_3_conv1_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf35"}, {"Ptr": "layer3_3_bn1_running_mean"}, {"Ptr": "layer3_3_bn1_running_var"}, {"Ptr": "layer3_3_bn1_weight"}, {"Ptr": "layer3_3_bn1_bias"}, {"Ptr": "var_198"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf35"}, {"Ptr": "layer3_3_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_3_bn2_running_mean"}, {"Ptr": "layer3_3_bn2_running_var"}, {"Ptr": "layer3_3_bn2_weight"}, {"Ptr": "layer3_3_bn2_bias"}, {"Ptr": "var_208"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf34"}, {"Ptr": "layer3_4_conv1_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf41"}, {"Ptr": "layer3_4_bn1_running_mean"}, {"Ptr": "layer3_4_bn1_running_var"}, {"Ptr": "layer3_4_bn1_weight"}, {"Ptr": "layer3_4_bn1_bias"}, {"Ptr": "var_217"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf41"}, {"Ptr": "layer3_4_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf34"}, {"Ptr": "buf35"}, {"Ptr": "layer3_4_bn2_running_mean"}, {"Ptr": "layer3_4_bn2_running_var"}, {"Ptr": "layer3_4_bn2_weight"}, {"Ptr": "layer3_4_bn2_bias"}, {"Ptr": "var_227"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf34"}, {"Ptr": "layer3_5_conv1_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf35"}, {"Ptr": "layer3_5_bn1_running_mean"}, {"Ptr": "layer3_5_bn1_running_var"}, {"Ptr": "layer3_5_bn1_weight"}, {"Ptr": "layer3_5_bn1_bias"}, {"Ptr": "var_236"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern022", [{"Ptr": "buf35"}, {"Ptr": "layer3_5_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_5_bn2_running_mean"}, {"Ptr": "layer3_5_bn2_running_var"}, {"Ptr": "layer3_5_bn2_weight"}, {"Ptr": "layer3_5_bn2_bias"}, {"Ptr": "var_246"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern044", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "buf60"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf60"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern046", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf61"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "buf60"}], { + "grid_dim_x": {"Absolute": 2}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern047", [{"Ptr": "buf60"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 10240} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf61"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf67"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern049", [{"Ptr": "buf60"}, {"Ptr": "buf67"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "var_285"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern047", [{"Ptr": "buf60"}, {"Ptr": "layer4_2_conv1_weight"}, {"Ptr": "layer4_2_bn1_running_mean"}, {"Ptr": "layer4_2_bn1_running_var"}, {"Ptr": "layer4_2_bn1_weight"}, {"Ptr": "layer4_2_bn1_bias"}, {"Ptr": "buf67"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 10240} + }]}, + {"ExecKernel": ["kern045", [{"Ptr": "buf67"}, {"Ptr": "layer4_2_conv2_weight"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern052", [{"Ptr": "buf72"}, {"Ptr": "buf61"}, {"Ptr": "layer4_2_bn2_running_mean"}, {"Ptr": "layer4_2_bn2_running_var"}, {"Ptr": "layer4_2_bn2_weight"}, {"Ptr": "layer4_2_bn2_bias"}, {"Ptr": "buf60"}, {"Ptr": "var_303"}, {"Ptr": "var_304"}], { + "grid_dim_x": {"Absolute": 1024}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 64}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 4} + }]}, + {"ExecKernel": ["kern053", [{"Ptr": "fc_bias"}, {"Ptr": "buf72"}, {"Ptr": "fc_weight"}, {"Ptr": "buf74"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet34batch4.json b/machine_interface/tests/data/hip/test_gpu_resnet34batch4.json new file mode 100644 index 00000000..0e60f8e9 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet34batch4.json @@ -0,0 +1,545 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "path": "hip/resnet34batch4/tem_fused_conv2d_0.hsaco"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet34batch4/poi_fused_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet34batch4/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "path": "hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "path": "hip/resnet34batch4/tem_fused_conv2d_6.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "path": "hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_conv2d_12.hsaco", "path": "hip/resnet34batch4/tem_fused_conv2d_12.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet34batch4/poi_fused_conv2d_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_17.hsaco", "path": "hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_17.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet34batch4/poi_fused_conv2d_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_20.hsaco", "path": "hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_20.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco", "path": "hip/resnet34batch4/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco"}, + {"module_name": "tem_fused_linear_22.hsaco", "path": "hip/resnet34batch4/tem_fused_linear_22.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern009"}, + {"module_name": "tem_fused_conv2d_6.hsaco", "kernel_name": "kern010"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern011"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern013"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern018"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern019"}, + {"module_name": "tem_fused_conv2d_12.hsaco", "kernel_name": "kern020"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern021"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern023"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern025"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern042"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_17.hsaco", "kernel_name": "kern043"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern044"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern046"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_20.hsaco", "kernel_name": "kern048"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco", "kernel_name": "kern052"}, + {"module_name": "tem_fused_linear_22.hsaco", "kernel_name": "kern053"} + ], + "blueprint": { + "inputs": ["arg218_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_8", "var_11", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "layer1_2_conv1_weight", "layer1_2_bn1_running_mean", "layer1_2_bn1_running_var", "layer1_2_bn1_weight", "layer1_2_bn1_bias", "layer1_2_conv2_weight", "layer1_2_bn2_running_mean", "layer1_2_bn2_running_var", "layer1_2_bn2_weight", "layer1_2_bn2_bias", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_downsample_0_weight", "layer2_0_conv2_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "layer2_2_conv1_weight", "layer2_2_bn1_running_mean", "layer2_2_bn1_running_var", "layer2_2_bn1_weight", "layer2_2_bn1_bias", "layer2_2_conv2_weight", "layer2_2_bn2_running_mean", "layer2_2_bn2_running_var", "layer2_2_bn2_weight", "layer2_2_bn2_bias", "layer2_3_conv1_weight", "layer2_3_bn1_running_mean", "layer2_3_bn1_running_var", "layer2_3_bn1_weight", "layer2_3_bn1_bias", "layer2_3_conv2_weight", "layer2_3_bn2_running_mean", "layer2_3_bn2_running_var", "layer2_3_bn2_weight", "layer2_3_bn2_bias", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "var_147", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "var_156", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_166", "layer3_2_conv1_weight", "layer3_2_bn1_running_mean", "layer3_2_bn1_running_var", "layer3_2_bn1_weight", "layer3_2_bn1_bias", "var_175", "layer3_2_conv2_weight", "layer3_2_bn2_running_mean", "layer3_2_bn2_running_var", "layer3_2_bn2_weight", "layer3_2_bn2_bias", "var_185", "layer3_3_conv1_weight", "layer3_3_bn1_running_mean", "layer3_3_bn1_running_var", "layer3_3_bn1_weight", "layer3_3_bn1_bias", "var_194", "layer3_3_conv2_weight", "layer3_3_bn2_running_mean", "layer3_3_bn2_running_var", "layer3_3_bn2_weight", "layer3_3_bn2_bias", "var_204", "layer3_4_conv1_weight", "layer3_4_bn1_running_mean", "layer3_4_bn1_running_var", "layer3_4_bn1_weight", "layer3_4_bn1_bias", "var_213", "layer3_4_conv2_weight", "layer3_4_bn2_running_mean", "layer3_4_bn2_running_var", "layer3_4_bn2_weight", "layer3_4_bn2_bias", "var_223", "layer3_5_conv1_weight", "layer3_5_bn1_running_mean", "layer3_5_bn1_running_var", "layer3_5_bn1_weight", "layer3_5_bn1_bias", "var_232", "layer3_5_conv2_weight", "layer3_5_bn2_running_mean", "layer3_5_bn2_running_var", "layer3_5_bn2_weight", "layer3_5_bn2_bias", "var_242", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "var_273", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_283", "layer4_2_conv1_weight", "layer4_2_bn1_running_mean", "layer4_2_bn1_running_var", "layer4_2_bn1_weight", "layer4_2_bn1_bias", "var_292", "layer4_2_conv2_weight", "layer4_2_bn2_running_mean", "layer4_2_bn2_running_var", "layer4_2_bn2_weight", "layer4_2_bn2_bias", "var_303", "var_304", "fc_bias", "fc_weight"], + "buffers": {"buf0": {"Absolute": 12845056}, "buf2": {"Absolute": 3211264}, "buf4": {"Absolute": 3211264}, "buf6": {"Absolute": 3211264}, "buf16": {"Absolute": 1605632}, "buf18": {"Absolute": 1605632}, "buf20": {"Absolute": 1605632}, "buf34": {"Absolute": 802816}, "buf35": {"Absolute": 802816}, "buf41": {"Absolute": 802816}, "buf60": {"Absolute": 401408}, "buf61": {"Absolute": 401408}, "buf67": {"Absolute": 401408}, "buf72": {"Absolute": 8192}, "buf74": {"Absolute": 16000}}, + "outputs": ["buf74"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg218_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "buf0"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf0"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "var_8"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf0"}, {"Ptr": "buf2"}, {"Ptr": "var_11"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf4"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 33024} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf6"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf4"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 33024} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_2_conv1_weight"}, {"Ptr": "layer1_2_bn1_running_mean"}, {"Ptr": "layer1_2_bn1_running_var"}, {"Ptr": "layer1_2_bn1_weight"}, {"Ptr": "layer1_2_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf4"}, {"Ptr": "layer1_2_conv2_weight"}, {"Ptr": "layer1_2_bn2_running_mean"}, {"Ptr": "layer1_2_bn2_running_var"}, {"Ptr": "layer1_2_bn2_weight"}, {"Ptr": "layer1_2_bn2_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 33024} + }]}, + {"ExecKernel": ["kern009", [{"Ptr": "buf6"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern010", [{"Ptr": "buf6"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf18"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern011", [{"Ptr": "buf16"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf18"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "buf20"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf20"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "buf18"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf18"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "buf20"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf16"}, {"Ptr": "layer2_2_conv1_weight"}, {"Ptr": "layer2_2_bn1_running_mean"}, {"Ptr": "layer2_2_bn1_running_var"}, {"Ptr": "layer2_2_bn1_weight"}, {"Ptr": "layer2_2_bn1_bias"}, {"Ptr": "buf18"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf18"}, {"Ptr": "layer2_2_conv2_weight"}, {"Ptr": "layer2_2_bn2_running_mean"}, {"Ptr": "layer2_2_bn2_running_var"}, {"Ptr": "layer2_2_bn2_weight"}, {"Ptr": "layer2_2_bn2_bias"}, {"Ptr": "buf16"}, {"Ptr": "buf20"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf20"}, {"Ptr": "layer2_3_conv1_weight"}, {"Ptr": "layer2_3_bn1_running_mean"}, {"Ptr": "layer2_3_bn1_running_var"}, {"Ptr": "layer2_3_bn1_weight"}, {"Ptr": "layer2_3_bn1_bias"}, {"Ptr": "buf18"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf18"}, {"Ptr": "layer2_3_conv2_weight"}, {"Ptr": "layer2_3_bn2_running_mean"}, {"Ptr": "layer2_3_bn2_running_var"}, {"Ptr": "layer2_3_bn2_weight"}, {"Ptr": "layer2_3_bn2_bias"}, {"Ptr": "buf20"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf16"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf34"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern020", [{"Ptr": "buf16"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern021", [{"Ptr": "buf35"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "buf34"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "var_147"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf35"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf34"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "var_156"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf34"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf35"}, {"Ptr": "buf41"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_166"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf35"}, {"Ptr": "layer3_2_conv1_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf41"}, {"Ptr": "layer3_2_bn1_running_mean"}, {"Ptr": "layer3_2_bn1_running_var"}, {"Ptr": "layer3_2_bn1_weight"}, {"Ptr": "layer3_2_bn1_bias"}, {"Ptr": "var_175"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf41"}, {"Ptr": "layer3_2_conv2_weight"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf35"}, {"Ptr": "buf34"}, {"Ptr": "layer3_2_bn2_running_mean"}, {"Ptr": "layer3_2_bn2_running_var"}, {"Ptr": "layer3_2_bn2_weight"}, {"Ptr": "layer3_2_bn2_bias"}, {"Ptr": "var_185"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf35"}, {"Ptr": "layer3_3_conv1_weight"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf34"}, {"Ptr": "layer3_3_bn1_running_mean"}, {"Ptr": "layer3_3_bn1_running_var"}, {"Ptr": "layer3_3_bn1_weight"}, {"Ptr": "layer3_3_bn1_bias"}, {"Ptr": "var_194"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf34"}, {"Ptr": "layer3_3_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf35"}, {"Ptr": "buf41"}, {"Ptr": "layer3_3_bn2_running_mean"}, {"Ptr": "layer3_3_bn2_running_var"}, {"Ptr": "layer3_3_bn2_weight"}, {"Ptr": "layer3_3_bn2_bias"}, {"Ptr": "var_204"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf35"}, {"Ptr": "layer3_4_conv1_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf41"}, {"Ptr": "layer3_4_bn1_running_mean"}, {"Ptr": "layer3_4_bn1_running_var"}, {"Ptr": "layer3_4_bn1_weight"}, {"Ptr": "layer3_4_bn1_bias"}, {"Ptr": "var_213"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf41"}, {"Ptr": "layer3_4_conv2_weight"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf35"}, {"Ptr": "buf34"}, {"Ptr": "layer3_4_bn2_running_mean"}, {"Ptr": "layer3_4_bn2_running_var"}, {"Ptr": "layer3_4_bn2_weight"}, {"Ptr": "layer3_4_bn2_bias"}, {"Ptr": "var_223"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf35"}, {"Ptr": "layer3_5_conv1_weight"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern023", [{"Ptr": "buf34"}, {"Ptr": "layer3_5_bn1_running_mean"}, {"Ptr": "layer3_5_bn1_running_var"}, {"Ptr": "layer3_5_bn1_weight"}, {"Ptr": "layer3_5_bn1_bias"}, {"Ptr": "var_232"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf34"}, {"Ptr": "layer3_5_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf35"}, {"Ptr": "buf41"}, {"Ptr": "layer3_5_bn2_running_mean"}, {"Ptr": "layer3_5_bn2_running_var"}, {"Ptr": "layer3_5_bn2_weight"}, {"Ptr": "layer3_5_bn2_bias"}, {"Ptr": "var_242"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern042", [{"Ptr": "buf35"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "buf60"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf60"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern044", [{"Ptr": "buf35"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf61"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "buf60"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf60"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern046", [{"Ptr": "buf61"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "var_273"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf61"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf67"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern048", [{"Ptr": "buf60"}, {"Ptr": "buf67"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "var_283"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf60"}, {"Ptr": "layer4_2_conv1_weight"}, {"Ptr": "buf67"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern046", [{"Ptr": "buf67"}, {"Ptr": "layer4_2_bn1_running_mean"}, {"Ptr": "layer4_2_bn1_running_var"}, {"Ptr": "layer4_2_bn1_weight"}, {"Ptr": "layer4_2_bn1_bias"}, {"Ptr": "var_292"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern043", [{"Ptr": "buf67"}, {"Ptr": "layer4_2_conv2_weight"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern052", [{"Ptr": "buf72"}, {"Ptr": "buf61"}, {"Ptr": "layer4_2_bn2_running_mean"}, {"Ptr": "layer4_2_bn2_running_var"}, {"Ptr": "layer4_2_bn2_weight"}, {"Ptr": "layer4_2_bn2_bias"}, {"Ptr": "buf60"}, {"Ptr": "var_303"}, {"Ptr": "var_304"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32} + }]}, + {"ExecKernel": ["kern053", [{"Ptr": "fc_bias"}, {"Ptr": "buf72"}, {"Ptr": "fc_weight"}, {"Ptr": "buf74"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet34batch8.json b/machine_interface/tests/data/hip/test_gpu_resnet34batch8.json new file mode 100644 index 00000000..6a730470 --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet34batch8.json @@ -0,0 +1,592 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_0.hsaco"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet34batch8/poi_fused_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet34batch8/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_8.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_8.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "path": "hip/resnet34batch8/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet34batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "tem_fused_conv2d_17.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_17.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet34batch8/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco", "path": "hip/resnet34batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_21.hsaco", "path": "hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_21.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco", "path": "hip/resnet34batch8/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco"}, + {"module_name": "tem_fused_linear_23.hsaco", "path": "hip/resnet34batch8/tem_fused_linear_23.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern005"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern013"}, + {"module_name": "tem_fused_conv2d_8.hsaco", "kernel_name": "kern014"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern015"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco", "kernel_name": "kern017"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern019"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern028"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern029"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern030"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern031"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern033"}, + {"module_name": "tem_fused_conv2d_17.hsaco", "kernel_name": "kern046"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern047"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern048"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco", "kernel_name": "kern049"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_21.hsaco", "kernel_name": "kern053"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco", "kernel_name": "kern057"}, + {"module_name": "tem_fused_linear_23.hsaco", "kernel_name": "kern058"} + ], + "blueprint": { + "inputs": ["arg218_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_8", "var_11", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "var_28", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "var_45", "layer1_2_conv1_weight", "layer1_2_bn1_running_mean", "layer1_2_bn1_running_var", "layer1_2_bn1_weight", "layer1_2_bn1_bias", "layer1_2_conv2_weight", "layer1_2_bn2_running_mean", "layer1_2_bn2_running_var", "layer1_2_bn2_weight", "layer1_2_bn2_bias", "var_62", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_conv2_weight", "layer2_0_downsample_0_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "var_86", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "var_95", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "var_105", "layer2_2_conv1_weight", "layer2_2_bn1_running_mean", "layer2_2_bn1_running_var", "layer2_2_bn1_weight", "layer2_2_bn1_bias", "var_114", "layer2_2_conv2_weight", "layer2_2_bn2_running_mean", "layer2_2_bn2_running_var", "layer2_2_bn2_weight", "layer2_2_bn2_bias", "var_124", "layer2_3_conv1_weight", "layer2_3_bn1_running_mean", "layer2_3_bn1_running_var", "layer2_3_bn1_weight", "layer2_3_bn1_bias", "var_133", "layer2_3_conv2_weight", "layer2_3_bn2_running_mean", "layer2_3_bn2_running_var", "layer2_3_bn2_weight", "layer2_3_bn2_bias", "var_143", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_downsample_0_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_conv1_weight", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_182", "layer3_2_conv1_weight", "layer3_2_bn1_running_mean", "layer3_2_bn1_running_var", "layer3_2_bn1_weight", "layer3_2_bn1_bias", "layer3_2_conv2_weight", "layer3_2_bn2_running_mean", "layer3_2_bn2_running_var", "layer3_2_bn2_weight", "layer3_2_bn2_bias", "var_199", "layer3_3_conv1_weight", "layer3_3_bn1_running_mean", "layer3_3_bn1_running_var", "layer3_3_bn1_weight", "layer3_3_bn1_bias", "layer3_3_conv2_weight", "layer3_3_bn2_running_mean", "layer3_3_bn2_running_var", "layer3_3_bn2_weight", "layer3_3_bn2_bias", "var_216", "layer3_4_conv1_weight", "layer3_4_bn1_running_mean", "layer3_4_bn1_running_var", "layer3_4_bn1_weight", "layer3_4_bn1_bias", "layer3_4_conv2_weight", "layer3_4_bn2_running_mean", "layer3_4_bn2_running_var", "layer3_4_bn2_weight", "layer3_4_bn2_bias", "var_233", "layer3_5_conv1_weight", "layer3_5_bn1_running_mean", "layer3_5_bn1_running_var", "layer3_5_bn1_weight", "layer3_5_bn1_bias", "layer3_5_conv2_weight", "layer3_5_bn2_running_mean", "layer3_5_bn2_running_var", "layer3_5_bn2_weight", "layer3_5_bn2_bias", "var_250", "layer4_0_conv1_weight", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "var_259", "layer4_0_conv2_weight", "layer4_0_downsample_0_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "layer4_1_conv1_weight", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "var_283", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "var_293", "layer4_2_conv1_weight", "layer4_2_bn1_running_mean", "layer4_2_bn1_running_var", "layer4_2_bn1_weight", "layer4_2_bn1_bias", "var_302", "layer4_2_conv2_weight", "layer4_2_bn2_running_mean", "layer4_2_bn2_running_var", "layer4_2_bn2_weight", "layer4_2_bn2_bias", "var_313", "var_314", "fc_bias", "fc_weight"], + "buffers": {"buf0": {"Absolute": 25690112}, "buf2": {"Absolute": 6422528}, "buf4": {"Absolute": 6422528}, "buf5": {"Absolute": 6422528}, "buf16": {"Absolute": 3211264}, "buf17": {"Absolute": 3211264}, "buf23": {"Absolute": 3211264}, "buf34": {"Absolute": 1605632}, "buf35": {"Absolute": 1605632}, "buf41": {"Absolute": 1605632}, "buf59": {"Absolute": 802816}, "buf61": {"Absolute": 802816}, "buf67": {"Absolute": 802816}, "buf72": {"Absolute": 16384}, "buf74": {"Absolute": 32000}}, + "outputs": ["buf74"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg218_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "buf0"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf0"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "var_8"}], { + "grid_dim_x": {"Absolute": 6272}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf0"}, {"Ptr": "buf2"}, {"Ptr": "var_11"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf4"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern005", [{"Ptr": "buf2"}, {"Ptr": "buf5"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "var_28"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf5"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern005", [{"Ptr": "buf2"}, {"Ptr": "buf4"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "var_45"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_2_conv1_weight"}, {"Ptr": "layer1_2_bn1_running_mean"}, {"Ptr": "layer1_2_bn1_running_var"}, {"Ptr": "layer1_2_bn1_weight"}, {"Ptr": "layer1_2_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf4"}, {"Ptr": "layer1_2_conv2_weight"}, {"Ptr": "buf5"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern005", [{"Ptr": "buf2"}, {"Ptr": "buf5"}, {"Ptr": "layer1_2_bn2_running_mean"}, {"Ptr": "layer1_2_bn2_running_var"}, {"Ptr": "layer1_2_bn2_weight"}, {"Ptr": "layer1_2_bn2_bias"}, {"Ptr": "var_62"}], { + "grid_dim_x": {"Absolute": 3136}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf16"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "buf17"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf2"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern015", [{"Ptr": "buf17"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf16"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "var_86"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf17"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf16"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "var_95"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf16"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf17"}, {"Ptr": "buf23"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "var_105"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf17"}, {"Ptr": "layer2_2_conv1_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf23"}, {"Ptr": "layer2_2_bn1_running_mean"}, {"Ptr": "layer2_2_bn1_running_var"}, {"Ptr": "layer2_2_bn1_weight"}, {"Ptr": "layer2_2_bn1_bias"}, {"Ptr": "var_114"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf23"}, {"Ptr": "layer2_2_conv2_weight"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf17"}, {"Ptr": "buf16"}, {"Ptr": "layer2_2_bn2_running_mean"}, {"Ptr": "layer2_2_bn2_running_var"}, {"Ptr": "layer2_2_bn2_weight"}, {"Ptr": "layer2_2_bn2_bias"}, {"Ptr": "var_124"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf17"}, {"Ptr": "layer2_3_conv1_weight"}, {"Ptr": "buf16"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf16"}, {"Ptr": "layer2_3_bn1_running_mean"}, {"Ptr": "layer2_3_bn1_running_var"}, {"Ptr": "layer2_3_bn1_weight"}, {"Ptr": "layer2_3_bn1_bias"}, {"Ptr": "var_133"}], { + "grid_dim_x": {"Absolute": 784}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf16"}, {"Ptr": "layer2_3_conv2_weight"}, {"Ptr": "buf23"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32768} + }]}, + {"ExecKernel": ["kern019", [{"Ptr": "buf17"}, {"Ptr": "buf23"}, {"Ptr": "layer2_3_bn2_running_mean"}, {"Ptr": "layer2_3_bn2_running_var"}, {"Ptr": "layer2_3_bn2_weight"}, {"Ptr": "layer2_3_bn2_bias"}, {"Ptr": "var_143"}], { + "grid_dim_x": {"Absolute": 1568}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern028", [{"Ptr": "buf17"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf34"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern030", [{"Ptr": "buf17"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf35"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf34"}, {"Ptr": "layer3_1_conv1_weight"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf35"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_182"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf34"}, {"Ptr": "layer3_2_conv1_weight"}, {"Ptr": "layer3_2_bn1_running_mean"}, {"Ptr": "layer3_2_bn1_running_var"}, {"Ptr": "layer3_2_bn1_weight"}, {"Ptr": "layer3_2_bn1_bias"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf41"}, {"Ptr": "layer3_2_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf34"}, {"Ptr": "buf35"}, {"Ptr": "layer3_2_bn2_running_mean"}, {"Ptr": "layer3_2_bn2_running_var"}, {"Ptr": "layer3_2_bn2_weight"}, {"Ptr": "layer3_2_bn2_bias"}, {"Ptr": "var_199"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf34"}, {"Ptr": "layer3_3_conv1_weight"}, {"Ptr": "layer3_3_bn1_running_mean"}, {"Ptr": "layer3_3_bn1_running_var"}, {"Ptr": "layer3_3_bn1_weight"}, {"Ptr": "layer3_3_bn1_bias"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf35"}, {"Ptr": "layer3_3_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_3_bn2_running_mean"}, {"Ptr": "layer3_3_bn2_running_var"}, {"Ptr": "layer3_3_bn2_weight"}, {"Ptr": "layer3_3_bn2_bias"}, {"Ptr": "var_216"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf34"}, {"Ptr": "layer3_4_conv1_weight"}, {"Ptr": "layer3_4_bn1_running_mean"}, {"Ptr": "layer3_4_bn1_running_var"}, {"Ptr": "layer3_4_bn1_weight"}, {"Ptr": "layer3_4_bn1_bias"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf41"}, {"Ptr": "layer3_4_conv2_weight"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf34"}, {"Ptr": "buf35"}, {"Ptr": "layer3_4_bn2_running_mean"}, {"Ptr": "layer3_4_bn2_running_var"}, {"Ptr": "layer3_4_bn2_weight"}, {"Ptr": "layer3_4_bn2_bias"}, {"Ptr": "var_233"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf34"}, {"Ptr": "layer3_5_conv1_weight"}, {"Ptr": "layer3_5_bn1_running_mean"}, {"Ptr": "layer3_5_bn1_running_var"}, {"Ptr": "layer3_5_bn1_weight"}, {"Ptr": "layer3_5_bn1_bias"}, {"Ptr": "buf35"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf35"}, {"Ptr": "layer3_5_conv2_weight"}, {"Ptr": "buf41"}], { + "grid_dim_x": {"Absolute": 25}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf34"}, {"Ptr": "buf41"}, {"Ptr": "layer3_5_bn2_running_mean"}, {"Ptr": "layer3_5_bn2_running_var"}, {"Ptr": "layer3_5_bn2_weight"}, {"Ptr": "layer3_5_bn2_bias"}, {"Ptr": "var_250"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern046", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_conv1_weight"}, {"Ptr": "buf59"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern047", [{"Ptr": "buf59"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "var_259"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern048", [{"Ptr": "buf59"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern049", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf61"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "buf59"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern048", [{"Ptr": "buf59"}, {"Ptr": "layer4_1_conv1_weight"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern047", [{"Ptr": "buf61"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "var_283"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern048", [{"Ptr": "buf61"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "buf67"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern053", [{"Ptr": "buf59"}, {"Ptr": "buf67"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "var_293"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern048", [{"Ptr": "buf59"}, {"Ptr": "layer4_2_conv1_weight"}, {"Ptr": "buf67"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern047", [{"Ptr": "buf67"}, {"Ptr": "layer4_2_bn1_running_mean"}, {"Ptr": "layer4_2_bn1_running_var"}, {"Ptr": "layer4_2_bn1_weight"}, {"Ptr": "layer4_2_bn1_bias"}, {"Ptr": "var_302"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern048", [{"Ptr": "buf67"}, {"Ptr": "layer4_2_conv2_weight"}, {"Ptr": "buf61"}], { + "grid_dim_x": {"Absolute": 7}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern057", [{"Ptr": "buf72"}, {"Ptr": "buf61"}, {"Ptr": "layer4_2_bn2_running_mean"}, {"Ptr": "layer4_2_bn2_running_var"}, {"Ptr": "layer4_2_bn2_weight"}, {"Ptr": "layer4_2_bn2_bias"}, {"Ptr": "buf59"}, {"Ptr": "var_313"}, {"Ptr": "var_314"}], { + "grid_dim_x": {"Absolute": 512}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32} + }]}, + {"ExecKernel": ["kern058", [{"Ptr": "fc_bias"}, {"Ptr": "buf72"}, {"Ptr": "fc_weight"}, {"Ptr": "buf74"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/hip/test_gpu_resnet50.json b/machine_interface/tests/data/hip/test_gpu_resnet50.json new file mode 100644 index 00000000..b2fab18d --- /dev/null +++ b/machine_interface/tests/data/hip/test_gpu_resnet50.json @@ -0,0 +1,624 @@ +{ + "modules": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "path": "hip/resnet50/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco", "path": "hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco", "path": "hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco"}, + {"module_name": "tem_fused_conv2d_10.hsaco", "path": "hip/resnet50/tem_fused_conv2d_10.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco", "path": "hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco", "path": "hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco", "path": "hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco", "path": "hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco", "path": "hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco", "path": "hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco"}, + {"module_name": "tem_fused_conv2d_24.hsaco", "path": "hip/resnet50/tem_fused_conv2d_24.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_25.hsaco", "path": "hip/resnet50/poi_fused_add_miopen_batch_norm_relu_25.hsaco"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco", "path": "hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco", "path": "hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_28.hsaco", "path": "hip/resnet50/poi_fused_add_miopen_batch_norm_relu_28.hsaco"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco", "path": "hip/resnet50/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco"}, + {"module_name": "tem_fused_linear_30.hsaco", "path": "hip/resnet50/tem_fused_linear_30.hsaco"} + ], + "kernels": [ + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco", "kernel_name": "kern000"}, + {"module_name": "poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco", "kernel_name": "kern001"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco", "kernel_name": "kern002"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco", "kernel_name": "kern003"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco", "kernel_name": "kern004"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco", "kernel_name": "kern005"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco", "kernel_name": "kern006"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco", "kernel_name": "kern008"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco", "kernel_name": "kern012"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco", "kernel_name": "kern013"}, + {"module_name": "tem_fused_conv2d_10.hsaco", "kernel_name": "kern014"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco", "kernel_name": "kern015"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco", "kernel_name": "kern016"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco", "kernel_name": "kern017"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco", "kernel_name": "kern018"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco", "kernel_name": "kern025"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco", "kernel_name": "kern026"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco", "kernel_name": "kern027"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco", "kernel_name": "kern028"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco", "kernel_name": "kern029"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco", "kernel_name": "kern031"}, + {"module_name": "tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco", "kernel_name": "kern033"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco", "kernel_name": "kern050"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco", "kernel_name": "kern051"}, + {"module_name": "tem_fused_conv2d_24.hsaco", "kernel_name": "kern052"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_25.hsaco", "kernel_name": "kern053"}, + {"module_name": "poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco", "kernel_name": "kern054"}, + {"module_name": "tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco", "kernel_name": "kern055"}, + {"module_name": "poi_fused_add_miopen_batch_norm_relu_28.hsaco", "kernel_name": "kern056"}, + {"module_name": "per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco", "kernel_name": "kern059"}, + {"module_name": "tem_fused_linear_30.hsaco", "kernel_name": "kern060"} + ], + "blueprint": { + "inputs": ["arg320_1", "conv1_weight", "bn1_running_mean", "bn1_running_var", "bn1_weight", "bn1_bias", "var_9", "layer1_0_conv1_weight", "layer1_0_bn1_running_mean", "layer1_0_bn1_running_var", "layer1_0_bn1_weight", "layer1_0_bn1_bias", "layer1_0_conv2_weight", "layer1_0_bn2_running_mean", "layer1_0_bn2_running_var", "layer1_0_bn2_weight", "layer1_0_bn2_bias", "layer1_0_conv3_weight", "layer1_0_downsample_0_weight", "layer1_0_bn3_running_mean", "layer1_0_bn3_running_var", "layer1_0_bn3_weight", "layer1_0_bn3_bias", "layer1_0_downsample_1_running_mean", "layer1_0_downsample_1_running_var", "layer1_0_downsample_1_weight", "layer1_0_downsample_1_bias", "layer1_1_conv1_weight", "layer1_1_bn1_running_mean", "layer1_1_bn1_running_var", "layer1_1_bn1_weight", "layer1_1_bn1_bias", "layer1_1_conv2_weight", "layer1_1_bn2_running_mean", "layer1_1_bn2_running_var", "layer1_1_bn2_weight", "layer1_1_bn2_bias", "layer1_1_conv3_weight", "layer1_1_bn3_running_mean", "layer1_1_bn3_running_var", "layer1_1_bn3_weight", "layer1_1_bn3_bias", "layer1_2_conv1_weight", "layer1_2_bn1_running_mean", "layer1_2_bn1_running_var", "layer1_2_bn1_weight", "layer1_2_bn1_bias", "layer1_2_conv2_weight", "layer1_2_bn2_running_mean", "layer1_2_bn2_running_var", "layer1_2_bn2_weight", "layer1_2_bn2_bias", "layer1_2_conv3_weight", "layer1_2_bn3_running_mean", "layer1_2_bn3_running_var", "layer1_2_bn3_weight", "layer1_2_bn3_bias", "layer2_0_conv1_weight", "layer2_0_bn1_running_mean", "layer2_0_bn1_running_var", "layer2_0_bn1_weight", "layer2_0_bn1_bias", "layer2_0_conv2_weight", "layer2_0_bn2_running_mean", "layer2_0_bn2_running_var", "layer2_0_bn2_weight", "layer2_0_bn2_bias", "layer2_0_downsample_0_weight", "layer2_0_conv3_weight", "layer2_0_bn3_running_mean", "layer2_0_bn3_running_var", "layer2_0_bn3_weight", "layer2_0_bn3_bias", "layer2_0_downsample_1_running_mean", "layer2_0_downsample_1_running_var", "layer2_0_downsample_1_weight", "layer2_0_downsample_1_bias", "layer2_1_conv1_weight", "layer2_1_bn1_running_mean", "layer2_1_bn1_running_var", "layer2_1_bn1_weight", "layer2_1_bn1_bias", "layer2_1_conv2_weight", "layer2_1_bn2_running_mean", "layer2_1_bn2_running_var", "layer2_1_bn2_weight", "layer2_1_bn2_bias", "layer2_1_conv3_weight", "layer2_1_bn3_running_mean", "layer2_1_bn3_running_var", "layer2_1_bn3_weight", "layer2_1_bn3_bias", "layer2_2_conv1_weight", "layer2_2_bn1_running_mean", "layer2_2_bn1_running_var", "layer2_2_bn1_weight", "layer2_2_bn1_bias", "layer2_2_conv2_weight", "layer2_2_bn2_running_mean", "layer2_2_bn2_running_var", "layer2_2_bn2_weight", "layer2_2_bn2_bias", "layer2_2_conv3_weight", "layer2_2_bn3_running_mean", "layer2_2_bn3_running_var", "layer2_2_bn3_weight", "layer2_2_bn3_bias", "layer2_3_conv1_weight", "layer2_3_bn1_running_mean", "layer2_3_bn1_running_var", "layer2_3_bn1_weight", "layer2_3_bn1_bias", "layer2_3_conv2_weight", "layer2_3_bn2_running_mean", "layer2_3_bn2_running_var", "layer2_3_bn2_weight", "layer2_3_bn2_bias", "layer2_3_conv3_weight", "layer2_3_bn3_running_mean", "layer2_3_bn3_running_var", "layer2_3_bn3_weight", "layer2_3_bn3_bias", "layer3_0_conv1_weight", "layer3_0_bn1_running_mean", "layer3_0_bn1_running_var", "layer3_0_bn1_weight", "layer3_0_bn1_bias", "layer3_0_conv2_weight", "layer3_0_bn2_running_mean", "layer3_0_bn2_running_var", "layer3_0_bn2_weight", "layer3_0_bn2_bias", "var_193", "layer3_0_conv3_weight", "layer3_0_downsample_0_weight", "layer3_0_bn3_running_mean", "layer3_0_bn3_running_var", "layer3_0_bn3_weight", "layer3_0_bn3_bias", "layer3_0_downsample_1_running_mean", "layer3_0_downsample_1_running_var", "layer3_0_downsample_1_weight", "layer3_0_downsample_1_bias", "layer3_1_bn1_running_mean", "layer3_1_bn1_running_var", "layer3_1_bn1_weight", "layer3_1_bn1_bias", "var_214", "layer3_1_conv2_weight", "layer3_1_bn2_running_mean", "layer3_1_bn2_running_var", "layer3_1_bn2_weight", "layer3_1_bn2_bias", "var_223", "layer3_1_conv3_weight", "layer3_1_bn3_running_mean", "layer3_1_bn3_running_var", "layer3_1_bn3_weight", "layer3_1_bn3_bias", "layer3_2_bn1_running_mean", "layer3_2_bn1_running_var", "layer3_2_bn1_weight", "layer3_2_bn1_bias", "var_237", "layer3_2_conv2_weight", "layer3_2_bn2_running_mean", "layer3_2_bn2_running_var", "layer3_2_bn2_weight", "layer3_2_bn2_bias", "var_246", "layer3_2_conv3_weight", "layer3_2_bn3_running_mean", "layer3_2_bn3_running_var", "layer3_2_bn3_weight", "layer3_2_bn3_bias", "layer3_3_bn1_running_mean", "layer3_3_bn1_running_var", "layer3_3_bn1_weight", "layer3_3_bn1_bias", "var_260", "layer3_3_conv2_weight", "layer3_3_bn2_running_mean", "layer3_3_bn2_running_var", "layer3_3_bn2_weight", "layer3_3_bn2_bias", "var_269", "layer3_3_conv3_weight", "layer3_3_bn3_running_mean", "layer3_3_bn3_running_var", "layer3_3_bn3_weight", "layer3_3_bn3_bias", "layer3_4_bn1_running_mean", "layer3_4_bn1_running_var", "layer3_4_bn1_weight", "layer3_4_bn1_bias", "var_283", "layer3_4_conv2_weight", "layer3_4_bn2_running_mean", "layer3_4_bn2_running_var", "layer3_4_bn2_weight", "layer3_4_bn2_bias", "var_292", "layer3_4_conv3_weight", "layer3_4_bn3_running_mean", "layer3_4_bn3_running_var", "layer3_4_bn3_weight", "layer3_4_bn3_bias", "layer3_5_bn1_running_mean", "layer3_5_bn1_running_var", "layer3_5_bn1_weight", "layer3_5_bn1_bias", "var_306", "layer3_5_conv2_weight", "layer3_5_bn2_running_mean", "layer3_5_bn2_running_var", "layer3_5_bn2_weight", "layer3_5_bn2_bias", "var_315", "layer3_5_conv3_weight", "layer3_5_bn3_running_mean", "layer3_5_bn3_running_var", "layer3_5_bn3_weight", "layer3_5_bn3_bias", "layer4_0_bn1_running_mean", "layer4_0_bn1_running_var", "layer4_0_bn1_weight", "layer4_0_bn1_bias", "var_329", "layer4_0_conv2_weight", "layer4_0_bn2_running_mean", "layer4_0_bn2_running_var", "layer4_0_bn2_weight", "layer4_0_bn2_bias", "layer4_0_downsample_0_weight", "layer4_0_bn3_running_mean", "layer4_0_bn3_running_var", "layer4_0_bn3_weight", "layer4_0_bn3_bias", "layer4_0_downsample_1_running_mean", "layer4_0_downsample_1_running_var", "layer4_0_downsample_1_weight", "layer4_0_downsample_1_bias", "var_350", "layer4_1_bn1_running_mean", "layer4_1_bn1_running_var", "layer4_1_bn1_weight", "layer4_1_bn1_bias", "var_356", "layer4_1_conv2_weight", "layer4_1_bn2_running_mean", "layer4_1_bn2_running_var", "layer4_1_bn2_weight", "layer4_1_bn2_bias", "layer4_1_bn3_running_mean", "layer4_1_bn3_running_var", "layer4_1_bn3_weight", "layer4_1_bn3_bias", "var_370", "layer4_2_bn1_running_mean", "layer4_2_bn1_running_var", "layer4_2_bn1_weight", "layer4_2_bn1_bias", "var_376", "layer4_2_conv2_weight", "layer4_2_bn2_running_mean", "layer4_2_bn2_running_var", "layer4_2_bn2_weight", "layer4_2_bn2_bias", "layer4_2_bn3_running_mean", "layer4_2_bn3_running_var", "layer4_2_bn3_weight", "layer4_2_bn3_bias", "var_391", "var_392", "fc_bias", "fc_weight"], + "buffers": {"buf1": {"Absolute": 3211264}, "buf2": {"Absolute": 802816}, "buf4": {"Absolute": 802816}, "buf6": {"Absolute": 802816}, "buf10": {"Absolute": 3211264}, "buf24": {"Absolute": 1605632}, "buf26": {"Absolute": 401408}, "buf30": {"Absolute": 1605632}, "buf34": {"Absolute": 401408}, "buf51": {"Absolute": 200704}, "buf59": {"Absolute": 200704}, "buf90": {"Absolute": 100352}, "buf98": {"Absolute": 100352}, "buf106": {"Absolute": 8192}, "buf108": {"Absolute": 4000}}, + "outputs": ["buf108"], + "control_flow": [ + {"ExecKernel": ["kern000", [{"Ptr": "arg320_1"}, {"Ptr": "conv1_weight"}, {"Ptr": "bn1_running_mean"}, {"Ptr": "bn1_running_var"}, {"Ptr": "bn1_weight"}, {"Ptr": "bn1_bias"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern001", [{"Ptr": "buf1"}, {"Ptr": "buf2"}, {"Ptr": "var_9"}], { + "grid_dim_x": {"Absolute": 392}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern002", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_conv1_weight"}, {"Ptr": "layer1_0_bn1_running_mean"}, {"Ptr": "layer1_0_bn1_running_var"}, {"Ptr": "layer1_0_bn1_weight"}, {"Ptr": "layer1_0_bn1_bias"}, {"Ptr": "buf4"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf4"}, {"Ptr": "layer1_0_conv2_weight"}, {"Ptr": "layer1_0_bn2_running_mean"}, {"Ptr": "layer1_0_bn2_running_var"}, {"Ptr": "layer1_0_bn2_weight"}, {"Ptr": "layer1_0_bn2_bias"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern004", [{"Ptr": "buf6"}, {"Ptr": "layer1_0_conv3_weight"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern005", [{"Ptr": "buf2"}, {"Ptr": "layer1_0_downsample_0_weight"}, {"Ptr": "buf1"}, {"Ptr": "layer1_0_bn3_running_mean"}, {"Ptr": "layer1_0_bn3_running_var"}, {"Ptr": "layer1_0_bn3_weight"}, {"Ptr": "layer1_0_bn3_bias"}, {"Ptr": "layer1_0_downsample_1_running_mean"}, {"Ptr": "layer1_0_downsample_1_running_var"}, {"Ptr": "layer1_0_downsample_1_weight"}, {"Ptr": "layer1_0_downsample_1_bias"}, {"Ptr": "buf10"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf10"}, {"Ptr": "layer1_1_conv1_weight"}, {"Ptr": "layer1_1_bn1_running_mean"}, {"Ptr": "layer1_1_bn1_running_var"}, {"Ptr": "layer1_1_bn1_weight"}, {"Ptr": "layer1_1_bn1_bias"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf2"}, {"Ptr": "layer1_1_conv2_weight"}, {"Ptr": "layer1_1_bn2_running_mean"}, {"Ptr": "layer1_1_bn2_running_var"}, {"Ptr": "layer1_1_bn2_weight"}, {"Ptr": "layer1_1_bn2_bias"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf6"}, {"Ptr": "layer1_1_conv3_weight"}, {"Ptr": "layer1_1_bn3_running_mean"}, {"Ptr": "layer1_1_bn3_running_var"}, {"Ptr": "layer1_1_bn3_weight"}, {"Ptr": "layer1_1_bn3_bias"}, {"Ptr": "buf10"}, {"Ptr": "buf1"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern006", [{"Ptr": "buf1"}, {"Ptr": "layer1_2_conv1_weight"}, {"Ptr": "layer1_2_bn1_running_mean"}, {"Ptr": "layer1_2_bn1_running_var"}, {"Ptr": "layer1_2_bn1_weight"}, {"Ptr": "layer1_2_bn1_bias"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern003", [{"Ptr": "buf6"}, {"Ptr": "layer1_2_conv2_weight"}, {"Ptr": "layer1_2_bn2_running_mean"}, {"Ptr": "layer1_2_bn2_running_var"}, {"Ptr": "layer1_2_bn2_weight"}, {"Ptr": "layer1_2_bn2_bias"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern008", [{"Ptr": "buf2"}, {"Ptr": "layer1_2_conv3_weight"}, {"Ptr": "layer1_2_bn3_running_mean"}, {"Ptr": "layer1_2_bn3_running_var"}, {"Ptr": "layer1_2_bn3_weight"}, {"Ptr": "layer1_2_bn3_bias"}, {"Ptr": "buf1"}, {"Ptr": "buf10"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern012", [{"Ptr": "buf10"}, {"Ptr": "layer2_0_conv1_weight"}, {"Ptr": "layer2_0_bn1_running_mean"}, {"Ptr": "layer2_0_bn1_running_var"}, {"Ptr": "layer2_0_bn1_weight"}, {"Ptr": "layer2_0_bn1_bias"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 49}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8704} + }]}, + {"ExecKernel": ["kern013", [{"Ptr": "buf24"}, {"Ptr": "layer2_0_conv2_weight"}, {"Ptr": "layer2_0_bn2_running_mean"}, {"Ptr": "layer2_0_bn2_running_var"}, {"Ptr": "layer2_0_bn2_weight"}, {"Ptr": "layer2_0_bn2_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern014", [{"Ptr": "buf10"}, {"Ptr": "layer2_0_downsample_0_weight"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern015", [{"Ptr": "buf26"}, {"Ptr": "layer2_0_conv3_weight"}, {"Ptr": "layer2_0_bn3_running_mean"}, {"Ptr": "layer2_0_bn3_running_var"}, {"Ptr": "layer2_0_bn3_weight"}, {"Ptr": "layer2_0_bn3_bias"}, {"Ptr": "buf24"}, {"Ptr": "layer2_0_downsample_1_running_mean"}, {"Ptr": "layer2_0_downsample_1_running_var"}, {"Ptr": "layer2_0_downsample_1_weight"}, {"Ptr": "layer2_0_downsample_1_bias"}, {"Ptr": "buf30"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf30"}, {"Ptr": "layer2_1_conv1_weight"}, {"Ptr": "layer2_1_bn1_running_mean"}, {"Ptr": "layer2_1_bn1_running_var"}, {"Ptr": "layer2_1_bn1_weight"}, {"Ptr": "layer2_1_bn1_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf26"}, {"Ptr": "layer2_1_conv2_weight"}, {"Ptr": "layer2_1_bn2_running_mean"}, {"Ptr": "layer2_1_bn2_running_var"}, {"Ptr": "layer2_1_bn2_weight"}, {"Ptr": "layer2_1_bn2_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer2_1_conv3_weight"}, {"Ptr": "layer2_1_bn3_running_mean"}, {"Ptr": "layer2_1_bn3_running_var"}, {"Ptr": "layer2_1_bn3_weight"}, {"Ptr": "layer2_1_bn3_bias"}, {"Ptr": "buf30"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf24"}, {"Ptr": "layer2_2_conv1_weight"}, {"Ptr": "layer2_2_bn1_running_mean"}, {"Ptr": "layer2_2_bn1_running_var"}, {"Ptr": "layer2_2_bn1_weight"}, {"Ptr": "layer2_2_bn1_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf34"}, {"Ptr": "layer2_2_conv2_weight"}, {"Ptr": "layer2_2_bn2_running_mean"}, {"Ptr": "layer2_2_bn2_running_var"}, {"Ptr": "layer2_2_bn2_weight"}, {"Ptr": "layer2_2_bn2_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf26"}, {"Ptr": "layer2_2_conv3_weight"}, {"Ptr": "layer2_2_bn3_running_mean"}, {"Ptr": "layer2_2_bn3_running_var"}, {"Ptr": "layer2_2_bn3_weight"}, {"Ptr": "layer2_2_bn3_bias"}, {"Ptr": "buf24"}, {"Ptr": "buf30"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern016", [{"Ptr": "buf30"}, {"Ptr": "layer2_3_conv1_weight"}, {"Ptr": "layer2_3_bn1_running_mean"}, {"Ptr": "layer2_3_bn1_running_var"}, {"Ptr": "layer2_3_bn1_weight"}, {"Ptr": "layer2_3_bn1_bias"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern017", [{"Ptr": "buf26"}, {"Ptr": "layer2_3_conv2_weight"}, {"Ptr": "layer2_3_bn2_running_mean"}, {"Ptr": "layer2_3_bn2_running_var"}, {"Ptr": "layer2_3_bn2_weight"}, {"Ptr": "layer2_3_bn2_bias"}, {"Ptr": "buf34"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 2}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern018", [{"Ptr": "buf34"}, {"Ptr": "layer2_3_conv3_weight"}, {"Ptr": "layer2_3_bn3_running_mean"}, {"Ptr": "layer2_3_bn3_running_var"}, {"Ptr": "layer2_3_bn3_weight"}, {"Ptr": "layer2_3_bn3_bias"}, {"Ptr": "buf30"}, {"Ptr": "buf24"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 8}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern025", [{"Ptr": "buf24"}, {"Ptr": "layer3_0_conv1_weight"}, {"Ptr": "layer3_0_bn1_running_mean"}, {"Ptr": "layer3_0_bn1_running_var"}, {"Ptr": "layer3_0_bn1_weight"}, {"Ptr": "layer3_0_bn1_bias"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 13}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern026", [{"Ptr": "buf2"}, {"Ptr": "layer3_0_conv2_weight"}, {"Ptr": "buf51"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf51"}, {"Ptr": "layer3_0_bn2_running_mean"}, {"Ptr": "layer3_0_bn2_running_var"}, {"Ptr": "layer3_0_bn2_weight"}, {"Ptr": "layer3_0_bn2_bias"}, {"Ptr": "var_193"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern028", [{"Ptr": "buf51"}, {"Ptr": "layer3_0_conv3_weight"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern029", [{"Ptr": "buf24"}, {"Ptr": "layer3_0_downsample_0_weight"}, {"Ptr": "buf2"}, {"Ptr": "layer3_0_bn3_running_mean"}, {"Ptr": "layer3_0_bn3_running_var"}, {"Ptr": "layer3_0_bn3_weight"}, {"Ptr": "layer3_0_bn3_bias"}, {"Ptr": "layer3_0_downsample_1_running_mean"}, {"Ptr": "layer3_0_downsample_1_running_var"}, {"Ptr": "layer3_0_downsample_1_weight"}, {"Ptr": "layer3_0_downsample_1_bias"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 17408} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf51"}, {"Ptr": "layer3_1_bn1_running_mean"}, {"Ptr": "layer3_1_bn1_running_var"}, {"Ptr": "layer3_1_bn1_weight"}, {"Ptr": "layer3_1_bn1_bias"}, {"Ptr": "var_214"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf51"}, {"Ptr": "layer3_1_conv2_weight"}, {"Ptr": "buf59"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf59"}, {"Ptr": "layer3_1_bn2_running_mean"}, {"Ptr": "layer3_1_bn2_running_var"}, {"Ptr": "layer3_1_bn2_weight"}, {"Ptr": "layer3_1_bn2_bias"}, {"Ptr": "var_223"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf59"}, {"Ptr": "layer3_1_conv3_weight"}, {"Ptr": "layer3_1_bn3_running_mean"}, {"Ptr": "layer3_1_bn3_running_var"}, {"Ptr": "layer3_1_bn3_weight"}, {"Ptr": "layer3_1_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf59"}, {"Ptr": "layer3_2_bn1_running_mean"}, {"Ptr": "layer3_2_bn1_running_var"}, {"Ptr": "layer3_2_bn1_weight"}, {"Ptr": "layer3_2_bn1_bias"}, {"Ptr": "var_237"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf59"}, {"Ptr": "layer3_2_conv2_weight"}, {"Ptr": "buf51"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf51"}, {"Ptr": "layer3_2_bn2_running_mean"}, {"Ptr": "layer3_2_bn2_running_var"}, {"Ptr": "layer3_2_bn2_weight"}, {"Ptr": "layer3_2_bn2_bias"}, {"Ptr": "var_246"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf51"}, {"Ptr": "layer3_2_conv3_weight"}, {"Ptr": "layer3_2_bn3_running_mean"}, {"Ptr": "layer3_2_bn3_running_var"}, {"Ptr": "layer3_2_bn3_weight"}, {"Ptr": "layer3_2_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf51"}, {"Ptr": "layer3_3_bn1_running_mean"}, {"Ptr": "layer3_3_bn1_running_var"}, {"Ptr": "layer3_3_bn1_weight"}, {"Ptr": "layer3_3_bn1_bias"}, {"Ptr": "var_260"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf51"}, {"Ptr": "layer3_3_conv2_weight"}, {"Ptr": "buf59"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf59"}, {"Ptr": "layer3_3_bn2_running_mean"}, {"Ptr": "layer3_3_bn2_running_var"}, {"Ptr": "layer3_3_bn2_weight"}, {"Ptr": "layer3_3_bn2_bias"}, {"Ptr": "var_269"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf59"}, {"Ptr": "layer3_3_conv3_weight"}, {"Ptr": "layer3_3_bn3_running_mean"}, {"Ptr": "layer3_3_bn3_running_var"}, {"Ptr": "layer3_3_bn3_weight"}, {"Ptr": "layer3_3_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf59"}, {"Ptr": "layer3_4_bn1_running_mean"}, {"Ptr": "layer3_4_bn1_running_var"}, {"Ptr": "layer3_4_bn1_weight"}, {"Ptr": "layer3_4_bn1_bias"}, {"Ptr": "var_283"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf59"}, {"Ptr": "layer3_4_conv2_weight"}, {"Ptr": "buf51"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf51"}, {"Ptr": "layer3_4_bn2_running_mean"}, {"Ptr": "layer3_4_bn2_running_var"}, {"Ptr": "layer3_4_bn2_weight"}, {"Ptr": "layer3_4_bn2_bias"}, {"Ptr": "var_292"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf51"}, {"Ptr": "layer3_4_conv3_weight"}, {"Ptr": "layer3_4_bn3_running_mean"}, {"Ptr": "layer3_4_bn3_running_var"}, {"Ptr": "layer3_4_bn3_weight"}, {"Ptr": "layer3_4_bn3_bias"}, {"Ptr": "buf2"}, {"Ptr": "buf6"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf51"}, {"Ptr": "layer3_5_bn1_running_mean"}, {"Ptr": "layer3_5_bn1_running_var"}, {"Ptr": "layer3_5_bn1_weight"}, {"Ptr": "layer3_5_bn1_bias"}, {"Ptr": "var_306"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern031", [{"Ptr": "buf51"}, {"Ptr": "layer3_5_conv2_weight"}, {"Ptr": "buf59"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 4}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern027", [{"Ptr": "buf59"}, {"Ptr": "layer3_5_bn2_running_mean"}, {"Ptr": "layer3_5_bn2_running_var"}, {"Ptr": "layer3_5_bn2_weight"}, {"Ptr": "layer3_5_bn2_bias"}, {"Ptr": "var_315"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern033", [{"Ptr": "buf59"}, {"Ptr": "layer3_5_conv3_weight"}, {"Ptr": "layer3_5_bn3_running_mean"}, {"Ptr": "layer3_5_bn3_running_var"}, {"Ptr": "layer3_5_bn3_weight"}, {"Ptr": "layer3_5_bn3_bias"}, {"Ptr": "buf6"}, {"Ptr": "buf2"}], { + "grid_dim_x": {"Absolute": 4}, + "grid_dim_y": {"Absolute": 16}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16640} + }]}, + {"ExecKernel": ["kern050", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_bn1_running_mean"}, {"Ptr": "layer4_0_bn1_running_var"}, {"Ptr": "layer4_0_bn1_weight"}, {"Ptr": "layer4_0_bn1_bias"}, {"Ptr": "var_329"}], { + "grid_dim_x": {"Absolute": 98}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern051", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_conv2_weight"}, {"Ptr": "layer4_0_bn2_running_mean"}, {"Ptr": "layer4_0_bn2_running_var"}, {"Ptr": "layer4_0_bn2_weight"}, {"Ptr": "layer4_0_bn2_bias"}, {"Ptr": "buf90"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 512}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 16384} + }]}, + {"ExecKernel": ["kern052", [{"Ptr": "buf2"}, {"Ptr": "layer4_0_downsample_0_weight"}, {"Ptr": "buf26"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 8192} + }]}, + {"ExecKernel": ["kern053", [{"Ptr": "buf34"}, {"Ptr": "layer4_0_bn3_running_mean"}, {"Ptr": "layer4_0_bn3_running_var"}, {"Ptr": "layer4_0_bn3_weight"}, {"Ptr": "layer4_0_bn3_bias"}, {"Ptr": "buf26"}, {"Ptr": "layer4_0_downsample_1_running_mean"}, {"Ptr": "layer4_0_downsample_1_running_var"}, {"Ptr": "layer4_0_downsample_1_weight"}, {"Ptr": "layer4_0_downsample_1_bias"}, {"Ptr": "var_350"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern054", [{"Ptr": "buf90"}, {"Ptr": "layer4_1_bn1_running_mean"}, {"Ptr": "layer4_1_bn1_running_var"}, {"Ptr": "layer4_1_bn1_weight"}, {"Ptr": "layer4_1_bn1_bias"}, {"Ptr": "var_356"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern055", [{"Ptr": "buf90"}, {"Ptr": "layer4_1_conv2_weight"}, {"Ptr": "layer4_1_bn2_running_mean"}, {"Ptr": "layer4_1_bn2_running_var"}, {"Ptr": "layer4_1_bn2_weight"}, {"Ptr": "layer4_1_bn2_bias"}, {"Ptr": "buf98"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 5120} + }]}, + {"ExecKernel": ["kern056", [{"Ptr": "buf34"}, {"Ptr": "buf26"}, {"Ptr": "layer4_1_bn3_running_mean"}, {"Ptr": "layer4_1_bn3_running_var"}, {"Ptr": "layer4_1_bn3_weight"}, {"Ptr": "layer4_1_bn3_bias"}, {"Ptr": "var_370"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern054", [{"Ptr": "buf98"}, {"Ptr": "layer4_2_bn1_running_mean"}, {"Ptr": "layer4_2_bn1_running_var"}, {"Ptr": "layer4_2_bn1_weight"}, {"Ptr": "layer4_2_bn1_bias"}, {"Ptr": "var_376"}], { + "grid_dim_x": {"Absolute": 196}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 0} + }]}, + {"ExecKernel": ["kern055", [{"Ptr": "buf98"}, {"Ptr": "layer4_2_conv2_weight"}, {"Ptr": "layer4_2_bn2_running_mean"}, {"Ptr": "layer4_2_bn2_running_var"}, {"Ptr": "layer4_2_bn2_weight"}, {"Ptr": "layer4_2_bn2_bias"}, {"Ptr": "buf90"}], { + "grid_dim_x": {"Absolute": 1}, + "grid_dim_y": {"Absolute": 32}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 5120} + }]}, + {"ExecKernel": ["kern059", [{"Ptr": "buf106"}, {"Ptr": "buf26"}, {"Ptr": "layer4_2_bn3_running_mean"}, {"Ptr": "layer4_2_bn3_running_var"}, {"Ptr": "layer4_2_bn3_weight"}, {"Ptr": "layer4_2_bn3_bias"}, {"Ptr": "buf34"}, {"Ptr": "var_391"}, {"Ptr": "var_392"}], { + "grid_dim_x": {"Absolute": 256}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 256}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 32} + }]}, + {"ExecKernel": ["kern060", [{"Ptr": "fc_bias"}, {"Ptr": "buf106"}, {"Ptr": "fc_weight"}, {"Ptr": "buf108"}], { + "grid_dim_x": {"Absolute": 32}, + "grid_dim_y": {"Absolute": 1}, + "grid_dim_z": {"Absolute": 1}, + "block_dim_x": {"Absolute": 128}, + "block_dim_y": {"Absolute": 1}, + "block_dim_z": {"Absolute": 1}, + "shared_mem_bytes": {"Absolute": 24576} + }]} + ] + } +} \ No newline at end of file diff --git a/machine_interface/tests/data/test_elf_mmu_x86_64_inference b/machine_interface/tests/data/test_elf_mmu_x86_64_inference new file mode 100755 index 00000000..7bdd7e9a Binary files /dev/null and b/machine_interface/tests/data/test_elf_mmu_x86_64_inference differ diff --git a/machine_interface/tests/data/test_elf_mmu_x86_64_inference_static b/machine_interface/tests/data/test_elf_mmu_x86_64_inference_static new file mode 100755 index 00000000..42f30eeb Binary files /dev/null and b/machine_interface/tests/data/test_elf_mmu_x86_64_inference_static differ diff --git a/machine_interface/tests/libs/cuda/bert.cubin b/machine_interface/tests/libs/cuda/bert.cubin new file mode 100644 index 00000000..9baeed23 Binary files /dev/null and b/machine_interface/tests/libs/cuda/bert.cubin differ diff --git a/machine_interface/tests/libs/cuda/double_matmul.cubin b/machine_interface/tests/libs/cuda/double_matmul.cubin new file mode 100644 index 00000000..fb43fddf Binary files /dev/null and b/machine_interface/tests/libs/cuda/double_matmul.cubin differ diff --git a/machine_interface/tests/libs/cuda/llama.cubin b/machine_interface/tests/libs/cuda/llama.cubin new file mode 100644 index 00000000..1fe6d3db Binary files /dev/null and b/machine_interface/tests/libs/cuda/llama.cubin differ diff --git a/machine_interface/tests/libs/cuda/llama_kv.cubin b/machine_interface/tests/libs/cuda/llama_kv.cubin new file mode 100644 index 00000000..b4f1511c Binary files /dev/null and b/machine_interface/tests/libs/cuda/llama_kv.cubin differ diff --git a/machine_interface/tests/libs/cuda/llm.cubin b/machine_interface/tests/libs/cuda/llm.cubin new file mode 100644 index 00000000..e00807aa Binary files /dev/null and b/machine_interface/tests/libs/cuda/llm.cubin differ diff --git a/machine_interface/tests/libs/cuda/llm_inference.cubin b/machine_interface/tests/libs/cuda/llm_inference.cubin new file mode 100644 index 00000000..1a2ac67c Binary files /dev/null and b/machine_interface/tests/libs/cuda/llm_inference.cubin differ diff --git a/machine_interface/tests/libs/cuda/lstm.cubin b/machine_interface/tests/libs/cuda/lstm.cubin new file mode 100644 index 00000000..553ce34f Binary files /dev/null and b/machine_interface/tests/libs/cuda/lstm.cubin differ diff --git a/machine_interface/tests/libs/cuda/mobilenetv2.cubin b/machine_interface/tests/libs/cuda/mobilenetv2.cubin new file mode 100644 index 00000000..b08aad58 Binary files /dev/null and b/machine_interface/tests/libs/cuda/mobilenetv2.cubin differ diff --git a/machine_interface/tests/libs/cuda/resnet101.cubin b/machine_interface/tests/libs/cuda/resnet101.cubin new file mode 100644 index 00000000..1d2d835e Binary files /dev/null and b/machine_interface/tests/libs/cuda/resnet101.cubin differ diff --git a/machine_interface/tests/libs/cuda/resnet152.cubin b/machine_interface/tests/libs/cuda/resnet152.cubin new file mode 100644 index 00000000..7725f91e Binary files /dev/null and b/machine_interface/tests/libs/cuda/resnet152.cubin differ diff --git a/machine_interface/tests/libs/cuda/resnet18.cubin b/machine_interface/tests/libs/cuda/resnet18.cubin new file mode 100644 index 00000000..98d08386 Binary files /dev/null and b/machine_interface/tests/libs/cuda/resnet18.cubin differ diff --git a/machine_interface/tests/libs/cuda/resnet34.cubin b/machine_interface/tests/libs/cuda/resnet34.cubin new file mode 100644 index 00000000..90617312 Binary files /dev/null and b/machine_interface/tests/libs/cuda/resnet34.cubin differ diff --git a/machine_interface/tests/libs/cuda/resnet50.cubin b/machine_interface/tests/libs/cuda/resnet50.cubin new file mode 100644 index 00000000..d9d0628e Binary files /dev/null and b/machine_interface/tests/libs/cuda/resnet50.cubin differ diff --git a/machine_interface/tests/libs/cuda/rnn.cubin b/machine_interface/tests/libs/cuda/rnn.cubin new file mode 100644 index 00000000..e0ba28e6 Binary files /dev/null and b/machine_interface/tests/libs/cuda/rnn.cubin differ diff --git a/machine_interface/tests/libs/cuda/simple.cubin b/machine_interface/tests/libs/cuda/simple.cubin new file mode 100644 index 00000000..abd5cea6 Binary files /dev/null and b/machine_interface/tests/libs/cuda/simple.cubin differ diff --git a/machine_interface/tests/libs/cuda/test.cubin b/machine_interface/tests/libs/cuda/test.cubin new file mode 100644 index 00000000..9e7a200f Binary files /dev/null and b/machine_interface/tests/libs/cuda/test.cubin differ diff --git a/machine_interface/tests/libs/cuda/vit_b_16.cubin b/machine_interface/tests/libs/cuda/vit_b_16.cubin new file mode 100644 index 00000000..0d9c03f0 Binary files /dev/null and b/machine_interface/tests/libs/cuda/vit_b_16.cubin differ diff --git a/machine_interface/tests/libs/hip/alexnet/poi_fused_adaptive_avg_pool2d_conv2d_max_pool2d_relu_7.hsaco b/machine_interface/tests/libs/hip/alexnet/poi_fused_adaptive_avg_pool2d_conv2d_max_pool2d_relu_7.hsaco new file mode 100644 index 00000000..0ebb2506 Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/poi_fused_adaptive_avg_pool2d_conv2d_max_pool2d_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/alexnet/poi_fused_conv2d_max_pool2d_relu_1.hsaco b/machine_interface/tests/libs/hip/alexnet/poi_fused_conv2d_max_pool2d_relu_1.hsaco new file mode 100644 index 00000000..751a5ea1 Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/poi_fused_conv2d_max_pool2d_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/alexnet/poi_fused_conv2d_max_pool2d_relu_3.hsaco b/machine_interface/tests/libs/hip/alexnet/poi_fused_conv2d_max_pool2d_relu_3.hsaco new file mode 100644 index 00000000..1773b864 Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/poi_fused_conv2d_max_pool2d_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_0.hsaco b/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_0.hsaco new file mode 100644 index 00000000..9c169f65 Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_2.hsaco b/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_2.hsaco new file mode 100644 index 00000000..208d0704 Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_4.hsaco b/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_4.hsaco new file mode 100644 index 00000000..2c419179 Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_5.hsaco b/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_5.hsaco new file mode 100644 index 00000000..7e5b169a Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_6.hsaco b/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_6.hsaco new file mode 100644 index 00000000..e9af7c7e Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/tem_fused_conv2d_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/alexnet/tem_fused_linear_relu_10.hsaco b/machine_interface/tests/libs/hip/alexnet/tem_fused_linear_relu_10.hsaco new file mode 100644 index 00000000..b4584e05 Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/tem_fused_linear_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/alexnet/tem_fused_relu_8.hsaco b/machine_interface/tests/libs/hip/alexnet/tem_fused_relu_8.hsaco new file mode 100644 index 00000000..4037e1c4 Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/tem_fused_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/alexnet/tem_fused_relu_9.hsaco b/machine_interface/tests/libs/hip/alexnet/tem_fused_relu_9.hsaco new file mode 100644 index 00000000..d5a3ffce Binary files /dev/null and b/machine_interface/tests/libs/hip/alexnet/tem_fused_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/batch_norm/poi_fused_2.hsaco b/machine_interface/tests/libs/hip/batch_norm/poi_fused_2.hsaco new file mode 100644 index 00000000..393494bf Binary files /dev/null and b/machine_interface/tests/libs/hip/batch_norm/poi_fused_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/batch_norm/poi_fused_avg_pool2d_conv2d_max_pool2d_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/batch_norm/poi_fused_avg_pool2d_conv2d_max_pool2d_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..ad19e418 Binary files /dev/null and b/machine_interface/tests/libs/hip/batch_norm/poi_fused_avg_pool2d_conv2d_max_pool2d_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/batch_norm/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco b/machine_interface/tests/libs/hip/batch_norm/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco new file mode 100644 index 00000000..e5a04b18 Binary files /dev/null and b/machine_interface/tests/libs/hip/batch_norm/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/batch_norm/tem_fused_linear_3.hsaco b/machine_interface/tests/libs/hip/batch_norm/tem_fused_linear_3.hsaco new file mode 100644 index 00000000..2a8e7ebc Binary files /dev/null and b/machine_interface/tests/libs/hip/batch_norm/tem_fused_linear_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/double_matmul/tem_fused_linear_0.hsaco b/machine_interface/tests/libs/hip/double_matmul/tem_fused_linear_0.hsaco new file mode 100644 index 00000000..76079ab7 Binary files /dev/null and b/machine_interface/tests/libs/hip/double_matmul/tem_fused_linear_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/double_matmul/tem_fused_linear_1.hsaco b/machine_interface/tests/libs/hip/double_matmul/tem_fused_linear_1.hsaco new file mode 100644 index 00000000..cf6fc64c Binary files /dev/null and b/machine_interface/tests/libs/hip/double_matmul/tem_fused_linear_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/lenet5/poi_fused_conv2d_0.hsaco b/machine_interface/tests/libs/hip/lenet5/poi_fused_conv2d_0.hsaco new file mode 100644 index 00000000..ec75ebad Binary files /dev/null and b/machine_interface/tests/libs/hip/lenet5/poi_fused_conv2d_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/lenet5/poi_fused_conv2d_max_pool2d_relu_2.hsaco b/machine_interface/tests/libs/hip/lenet5/poi_fused_conv2d_max_pool2d_relu_2.hsaco new file mode 100644 index 00000000..ddcfe39a Binary files /dev/null and b/machine_interface/tests/libs/hip/lenet5/poi_fused_conv2d_max_pool2d_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/lenet5/poi_fused_conv2d_max_pool2d_relu_4.hsaco b/machine_interface/tests/libs/hip/lenet5/poi_fused_conv2d_max_pool2d_relu_4.hsaco new file mode 100644 index 00000000..41f5b19e Binary files /dev/null and b/machine_interface/tests/libs/hip/lenet5/poi_fused_conv2d_max_pool2d_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/lenet5/tem_fused_conv2d_max_pool2d_relu_3.hsaco b/machine_interface/tests/libs/hip/lenet5/tem_fused_conv2d_max_pool2d_relu_3.hsaco new file mode 100644 index 00000000..ba3a8f6b Binary files /dev/null and b/machine_interface/tests/libs/hip/lenet5/tem_fused_conv2d_max_pool2d_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/lenet5/tem_fused_conv2d_relu_1.hsaco b/machine_interface/tests/libs/hip/lenet5/tem_fused_conv2d_relu_1.hsaco new file mode 100644 index 00000000..66c72bc1 Binary files /dev/null and b/machine_interface/tests/libs/hip/lenet5/tem_fused_conv2d_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/lenet5/tem_fused_linear_relu_7.hsaco b/machine_interface/tests/libs/hip/lenet5/tem_fused_linear_relu_7.hsaco new file mode 100644 index 00000000..31cdbfaa Binary files /dev/null and b/machine_interface/tests/libs/hip/lenet5/tem_fused_linear_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/lenet5/tem_fused_relu_5.hsaco b/machine_interface/tests/libs/hip/lenet5/tem_fused_relu_5.hsaco new file mode 100644 index 00000000..93c0e4f0 Binary files /dev/null and b/machine_interface/tests/libs/hip/lenet5/tem_fused_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/lenet5/tem_fused_relu_6.hsaco b/machine_interface/tests/libs/hip/lenet5/tem_fused_relu_6.hsaco new file mode 100644 index 00000000..01ed37e4 Binary files /dev/null and b/machine_interface/tests/libs/hip/lenet5/tem_fused_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco b/machine_interface/tests/libs/hip/resnet152/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco new file mode 100644 index 00000000..a2707a4b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/poi_fused_add_miopen_batch_norm_relu_25.hsaco b/machine_interface/tests/libs/hip/resnet152/poi_fused_add_miopen_batch_norm_relu_25.hsaco new file mode 100644 index 00000000..6294e18e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/poi_fused_add_miopen_batch_norm_relu_25.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/poi_fused_add_miopen_batch_norm_relu_28.hsaco b/machine_interface/tests/libs/hip/resnet152/poi_fused_add_miopen_batch_norm_relu_28.hsaco new file mode 100644 index 00000000..97ac6c06 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/poi_fused_add_miopen_batch_norm_relu_28.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco b/machine_interface/tests/libs/hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco new file mode 100644 index 00000000..5e1c4bd2 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco b/machine_interface/tests/libs/hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco new file mode 100644 index 00000000..94ca4f79 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco b/machine_interface/tests/libs/hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco new file mode 100644 index 00000000..d6f90867 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet152/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..28fe9062 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..f8d997d4 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..31c1c0bf Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..62873370 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco new file mode 100644 index 00000000..dad0b154 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..61e76d26 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..616da271 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_24.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_24.hsaco new file mode 100644 index 00000000..2dc718f4 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_24.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco new file mode 100644 index 00000000..add02474 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..c9c438bf Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..ff0b37a4 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..a15f21bd Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..5b655c86 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..b233ac00 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..00fb6c33 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..0a73e9ab Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco new file mode 100644 index 00000000..8ba2182d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco new file mode 100644 index 00000000..1f4b8cc3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco new file mode 100644 index 00000000..eae0583e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..b68512f6 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..c934ee81 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco new file mode 100644 index 00000000..210a8bdd Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco new file mode 100644 index 00000000..5b2d8a3b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..d2d871af Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet152/tem_fused_linear_30.hsaco b/machine_interface/tests/libs/hip/resnet152/tem_fused_linear_30.hsaco new file mode 100644 index 00000000..876a8e37 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet152/tem_fused_linear_30.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet18/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..596c3389 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/poi_fused_add_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet18/poi_fused_add_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..d687380f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/poi_fused_add_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/poi_fused_add_miopen_batch_norm_relu_17.hsaco b/machine_interface/tests/libs/hip/resnet18/poi_fused_add_miopen_batch_norm_relu_17.hsaco new file mode 100644 index 00000000..93ba5671 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/poi_fused_add_miopen_batch_norm_relu_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet18/poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..5e902ee1 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet18/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..28fe9062 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..0511b687 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..ffc58225 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco new file mode 100644 index 00000000..ced034dc Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco new file mode 100644 index 00000000..7957bf0c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_16.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_16.hsaco new file mode 100644 index 00000000..6df4a82e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco new file mode 100644 index 00000000..add02474 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..f17a8f8f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..ad478129 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..2b80fb4e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..9be253cd Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..709c7378 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..78bb7d46 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..2ab95f56 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..b0ee00ef Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..f384531d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18/tem_fused_linear_20.hsaco b/machine_interface/tests/libs/hip/resnet18/tem_fused_linear_20.hsaco new file mode 100644 index 00000000..5e05b806 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18/tem_fused_linear_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco b/machine_interface/tests/libs/hip/resnet18batch/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco new file mode 100644 index 00000000..276f7040 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..65caed13 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..f2d4488c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_21.hsaco b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_21.hsaco new file mode 100644 index 00000000..89c8e8bd Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..003d8f4a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..f6259a5e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_add_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..04fb4a1d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..dfa5df17 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..a31a4d37 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/poi_fused_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..68582246 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/poi_fused_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..b32496ac Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_0.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_0.hsaco new file mode 100644 index 00000000..df89c559 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_17.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_17.hsaco new file mode 100644 index 00000000..5d0f5c06 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_20.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_20.hsaco new file mode 100644 index 00000000..2c89af1d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_8.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_8.hsaco new file mode 100644 index 00000000..6234886a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..f6bdfb3f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..fb0f87ac Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..49f40ce3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..93054ae3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..db3a74cf Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..e88836e8 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco new file mode 100644 index 00000000..aa2f2694 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..61530eb0 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch/tem_fused_linear_23.hsaco b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_linear_23.hsaco new file mode 100644 index 00000000..0ecd9869 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch/tem_fused_linear_23.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco new file mode 100644 index 00000000..2dccc90b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..ff8e7ca1 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..c819e012 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..368ac012 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..c197ca0b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_add_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..e5a82122 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..a9f79f53 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..c6bb3f2b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..e2fa21b1 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..31199cde Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/poi_fused_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..63785490 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco new file mode 100644 index 00000000..53d706ae Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_0.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_0.hsaco new file mode 100644 index 00000000..22d870c9 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_12.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_12.hsaco new file mode 100644 index 00000000..84834da6 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_17.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_17.hsaco new file mode 100644 index 00000000..721c8926 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_3.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_3.hsaco new file mode 100644 index 00000000..02ec5d8a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_8.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_8.hsaco new file mode 100644 index 00000000..8fd53694 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..38aa8887 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..0a204073 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..fc51301f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco new file mode 100644 index 00000000..0598387c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..596eb3cc Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_linear_22.hsaco b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_linear_22.hsaco new file mode 100644 index 00000000..377ee204 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch16/tem_fused_linear_22.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco new file mode 100644 index 00000000..6f989bd3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..5e0cbf5b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..577dae10 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..aeaa5013 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..2b61b592 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_add_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..e18a25a1 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..dbc9924c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..8323cbad Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..ef7ee5f5 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_17.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_17.hsaco new file mode 100644 index 00000000..b829c3ee Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_6.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_6.hsaco new file mode 100644 index 00000000..975ed8ae Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco new file mode 100644 index 00000000..3bd91139 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..61a2b805 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..efb5d823 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..1ae1c626 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..76c6514a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..f327f9ec Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..9cf5af67 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..641daff6 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..d4e46150 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco new file mode 100644 index 00000000..f75b895f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_linear_21.hsaco b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_linear_21.hsaco new file mode 100644 index 00000000..46653d74 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch2/tem_fused_linear_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco new file mode 100644 index 00000000..b8d0c6e9 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..145efa94 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..79ac9cc6 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..1ccbc3eb Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_17.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_17.hsaco new file mode 100644 index 00000000..e1490342 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..af103064 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_add_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..45b8dbcb Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..db9e11bd Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..4527d895 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..75d27349 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/poi_fused_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco new file mode 100644 index 00000000..8335f8f2 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_0.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_0.hsaco new file mode 100644 index 00000000..dd6fe9c1 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_14.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_14.hsaco new file mode 100644 index 00000000..aeac8a70 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_3.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_3.hsaco new file mode 100644 index 00000000..38a9baf9 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_6.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_6.hsaco new file mode 100644 index 00000000..ceb099c6 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_9.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_9.hsaco new file mode 100644 index 00000000..25ca9871 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..371315cf Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..45abcb70 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..621b6a34 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..a8312acf Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..00152c5b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_21.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_21.hsaco new file mode 100644 index 00000000..bb0d03e7 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco new file mode 100644 index 00000000..28d64b61 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_linear_23.hsaco b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_linear_23.hsaco new file mode 100644 index 00000000..d89f9e5a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch32/tem_fused_linear_23.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..bc31aff9 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_add_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_add_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..d56f1f80 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_add_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..6de1c53d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..2e1a0f91 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..633a13da Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/poi_fused_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..849f6d76 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco new file mode 100644 index 00000000..221de1be Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..14974a04 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..76c24fc5 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..f6406a2b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_0.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_0.hsaco new file mode 100644 index 00000000..c9911e83 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_6.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_6.hsaco new file mode 100644 index 00000000..8f6c0c37 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..78a91bd9 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..be37eb4b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..6646d128 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..a426024f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..81a4c6e0 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..4e5922e3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..1918d2e0 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco new file mode 100644 index 00000000..3d3d094f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_linear_20.hsaco b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_linear_20.hsaco new file mode 100644 index 00000000..658bd78e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch4/tem_fused_linear_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco new file mode 100644 index 00000000..df1dfdb3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..4f241fd8 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..1cee452c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..7e948130 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_17.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_17.hsaco new file mode 100644 index 00000000..a45280b3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_21.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_21.hsaco new file mode 100644 index 00000000..1dbc43c4 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..4502efb1 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_add_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..6c415045 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..80a127be Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..713e5fc2 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..b816100d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..ae7d8db0 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/poi_fused_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_0.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_0.hsaco new file mode 100644 index 00000000..d000cbf2 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_12.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_12.hsaco new file mode 100644 index 00000000..d0083da8 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_15.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_15.hsaco new file mode 100644 index 00000000..f83b2032 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_20.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_20.hsaco new file mode 100644 index 00000000..e7635b1f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_3.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_3.hsaco new file mode 100644 index 00000000..f857bd39 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_6.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_6.hsaco new file mode 100644 index 00000000..eea20b9e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_9.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_9.hsaco new file mode 100644 index 00000000..7c70e63e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..a672a2fd Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..298a35d4 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..7acc7448 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_22.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_22.hsaco new file mode 100644 index 00000000..ff8d8037 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_22.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco new file mode 100644 index 00000000..c3af367b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_linear_24.hsaco b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_linear_24.hsaco new file mode 100644 index 00000000..8789fa6a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch64/tem_fused_linear_24.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco new file mode 100644 index 00000000..93a0db9c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..117deef7 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..215a0129 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..ff789b7d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..cb7b0e43 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_add_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..07374832 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..f461efa9 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..95361743 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..314e9f2e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/poi_fused_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..f60ba9fe Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco new file mode 100644 index 00000000..61ae28a2 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_0.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_0.hsaco new file mode 100644 index 00000000..a1b45a4e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_17.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_17.hsaco new file mode 100644 index 00000000..174bf361 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_8.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_8.hsaco new file mode 100644 index 00000000..c129dd20 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..05f32d4a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..fe6f05c9 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..8d0cf46a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..6c1a6e6d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..7a806045 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..a3bde144 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco new file mode 100644 index 00000000..99031b7b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..e0802667 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_linear_22.hsaco b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_linear_22.hsaco new file mode 100644 index 00000000..e16dd547 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18batch8/tem_fused_linear_22.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/per_fused_add_conv2d_mean_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/per_fused_add_conv2d_mean_relu_19.hsaco new file mode 100644 index 00000000..bcbe4b17 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/per_fused_add_conv2d_mean_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/poi_fused_add_conv2d_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/poi_fused_add_conv2d_relu_13.hsaco new file mode 100644 index 00000000..45a2366b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/poi_fused_add_conv2d_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/poi_fused_add_conv2d_relu_17.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/poi_fused_add_conv2d_relu_17.hsaco new file mode 100644 index 00000000..ca5dc72c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/poi_fused_add_conv2d_relu_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/poi_fused_conv2d_max_pool2d_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/poi_fused_conv2d_max_pool2d_relu_1.hsaco new file mode 100644 index 00000000..1046a158 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/poi_fused_conv2d_max_pool2d_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_11.hsaco new file mode 100644 index 00000000..b4baa5af Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_3.hsaco new file mode 100644 index 00000000..5ede0b2c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_6.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_6.hsaco new file mode 100644 index 00000000..c5a428d5 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_8.hsaco new file mode 100644 index 00000000..683b260c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_add_conv2d_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_16.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_16.hsaco new file mode 100644 index 00000000..7683a46d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_0.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_0.hsaco new file mode 100644 index 00000000..98214265 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_10.hsaco new file mode 100644 index 00000000..89883498 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_12.hsaco new file mode 100644 index 00000000..c213c46a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_14.hsaco new file mode 100644 index 00000000..eafb2172 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_15.hsaco new file mode 100644 index 00000000..03026b96 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_18.hsaco new file mode 100644 index 00000000..58155dc0 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_2.hsaco new file mode 100644 index 00000000..29fce037 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_4.hsaco new file mode 100644 index 00000000..9e907169 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_5.hsaco new file mode 100644 index 00000000..e823d193 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_7.hsaco new file mode 100644 index 00000000..096aa628 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_9.hsaco new file mode 100644 index 00000000..c1dda621 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_conv2d_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_linear_20.hsaco b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_linear_20.hsaco new file mode 100644 index 00000000..9127e21c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet18onnx/tem_fused_linear_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco b/machine_interface/tests/libs/hip/resnet34/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco new file mode 100644 index 00000000..f0f11615 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/poi_fused_add_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet34/poi_fused_add_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..f4c91830 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/poi_fused_add_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/poi_fused_add_miopen_batch_norm_relu_17.hsaco b/machine_interface/tests/libs/hip/resnet34/poi_fused_add_miopen_batch_norm_relu_17.hsaco new file mode 100644 index 00000000..34bd4b41 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/poi_fused_add_miopen_batch_norm_relu_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/poi_fused_add_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet34/poi_fused_add_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..0e910b44 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/poi_fused_add_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet34/poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..aa85f0c5 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/poi_fused_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet34/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..28fe9062 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..b3f37b79 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..ffc58225 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco new file mode 100644 index 00000000..18bb41d7 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco new file mode 100644 index 00000000..22f19eea Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_add_conv2d_miopen_batch_norm_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_16.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_16.hsaco new file mode 100644 index 00000000..c3d1b744 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco new file mode 100644 index 00000000..add02474 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..32da251c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..f9adf350 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..3cbfeb5d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..05617468 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..709c7378 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..70aa8ef2 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..0beb7b89 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..bb39cd2f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..8b5ae98e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34/tem_fused_linear_21.hsaco b/machine_interface/tests/libs/hip/resnet34/tem_fused_linear_21.hsaco new file mode 100644 index 00000000..d4b95254 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34/tem_fused_linear_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco new file mode 100644 index 00000000..fc1ee759 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_23.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..7587170e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..f2c3435f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_21.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_21.hsaco new file mode 100644 index 00000000..1ed9da9f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_22.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_22.hsaco new file mode 100644 index 00000000..b6689ad4 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_22.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..368ac012 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..2b8eb511 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_add_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..b34e9003 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..a5419b81 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..c6bb3f2b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..e2fa21b1 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..31199cde Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/poi_fused_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..268f5077 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_0.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_0.hsaco new file mode 100644 index 00000000..22d870c9 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_17.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_17.hsaco new file mode 100644 index 00000000..d322c3d4 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_20.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_20.hsaco new file mode 100644 index 00000000..a797d2ad Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_3.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_3.hsaco new file mode 100644 index 00000000..02ec5d8a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_8.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_8.hsaco new file mode 100644 index 00000000..0d92df1a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..86dd2ec0 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..1d153449 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..172f8957 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..26b85611 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco new file mode 100644 index 00000000..823edf8b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..df1d5c3f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_linear_24.hsaco b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_linear_24.hsaco new file mode 100644 index 00000000..cd0fb8fc Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch16/tem_fused_linear_24.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco new file mode 100644 index 00000000..60d4e542 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..b98941fb Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..ac7df00d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..a5d325ab Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..dd189e5e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_add_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..4831ceef Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..dbc9924c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..30f53319 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco new file mode 100644 index 00000000..98b2ceae Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..ef7ee5f5 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_add_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_6.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_6.hsaco new file mode 100644 index 00000000..6f5b6f66 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco new file mode 100644 index 00000000..3bd91139 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..ddacebb6 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..f9524307 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..df6ed032 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..db0db027 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..5632116b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..9cf5af67 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..4f2bbe87 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..4dbd4d5a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco new file mode 100644 index 00000000..b1a530c0 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_linear_21.hsaco b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_linear_21.hsaco new file mode 100644 index 00000000..e1f24acd Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch2/tem_fused_linear_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco new file mode 100644 index 00000000..b05db193 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..b00f3da5 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..184fd2d1 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_20.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_20.hsaco new file mode 100644 index 00000000..f916f0c3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_add_miopen_batch_norm_relu_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..ea99cd3c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..aa307dc3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..2e1a0f91 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..633a13da Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/poi_fused_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..c2016bf8 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..14974a04 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..9a207435 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..5eb20e65 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_add_conv2d_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_0.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_0.hsaco new file mode 100644 index 00000000..c9911e83 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_12.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_12.hsaco new file mode 100644 index 00000000..abeadc44 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_6.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_6.hsaco new file mode 100644 index 00000000..f07948b4 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..9beb9a27 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..9323b4b3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..ad3213db Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_17.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_17.hsaco new file mode 100644 index 00000000..d42198df Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..4e5922e3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..02e9b031 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco new file mode 100644 index 00000000..33fcef5c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_linear_22.hsaco b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_linear_22.hsaco new file mode 100644 index 00000000..34df40d1 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch4/tem_fused_linear_22.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco new file mode 100644 index 00000000..2339da17 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_22.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..e5ee04b3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..648e1ffe Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_21.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_21.hsaco new file mode 100644 index 00000000..92e26160 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..ff789b7d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..753cb01d Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_add_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco new file mode 100644 index 00000000..778ff70f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_conv2d_miopen_batch_norm_relu_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..eab01a1a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..95361743 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_max_pool2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..314e9f2e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/poi_fused_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..1d503a84 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco new file mode 100644 index 00000000..5a7b415f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_add_conv2d_miopen_batch_norm_relu_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_0.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_0.hsaco new file mode 100644 index 00000000..a1b45a4e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_17.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_17.hsaco new file mode 100644 index 00000000..a275ecf5 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_8.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_8.hsaco new file mode 100644 index 00000000..b25e2353 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..b6ccc6af Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..dda6d716 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..f5f3cd41 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..9880709a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..7a806045 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..a3bde144 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco new file mode 100644 index 00000000..f1fb7d23 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..9fa79763 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_linear_23.hsaco b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_linear_23.hsaco new file mode 100644 index 00000000..48b48a45 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet34batch8/tem_fused_linear_23.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco b/machine_interface/tests/libs/hip/resnet50/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco new file mode 100644 index 00000000..3c3299ac Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/per_fused_adaptive_avg_pool2d_add_miopen_batch_norm_relu_29.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/poi_fused_add_miopen_batch_norm_relu_25.hsaco b/machine_interface/tests/libs/hip/resnet50/poi_fused_add_miopen_batch_norm_relu_25.hsaco new file mode 100644 index 00000000..fd8546e3 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/poi_fused_add_miopen_batch_norm_relu_25.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/poi_fused_add_miopen_batch_norm_relu_28.hsaco b/machine_interface/tests/libs/hip/resnet50/poi_fused_add_miopen_batch_norm_relu_28.hsaco new file mode 100644 index 00000000..522f385c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/poi_fused_add_miopen_batch_norm_relu_28.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco b/machine_interface/tests/libs/hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco new file mode 100644 index 00000000..d7d420a6 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_17.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco b/machine_interface/tests/libs/hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco new file mode 100644 index 00000000..d21fb54e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_22.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco b/machine_interface/tests/libs/hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco new file mode 100644 index 00000000..c0fcadcc Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/poi_fused_conv2d_miopen_batch_norm_relu_26.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco b/machine_interface/tests/libs/hip/resnet50/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco new file mode 100644 index 00000000..28fe9062 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/poi_fused_max_pool2d_miopen_batch_norm_relu_1.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco new file mode 100644 index 00000000..5d39dcf2 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_11.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco new file mode 100644 index 00000000..31c1c0bf Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_14.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco new file mode 100644 index 00000000..470d9218 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_19.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco new file mode 100644 index 00000000..b188e052 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_21.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco new file mode 100644 index 00000000..61e76d26 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_5.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco new file mode 100644 index 00000000..616da271 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_add_conv2d_miopen_batch_norm_relu_7.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_10.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_10.hsaco new file mode 100644 index 00000000..3255dc82 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_10.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_24.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_24.hsaco new file mode 100644 index 00000000..79a47521 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_24.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco new file mode 100644 index 00000000..add02474 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_0.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco new file mode 100644 index 00000000..ff0b37a4 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_12.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco new file mode 100644 index 00000000..0f0288db Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_13.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco new file mode 100644 index 00000000..ab9a3895 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_15.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco new file mode 100644 index 00000000..3d7ba62e Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_16.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco new file mode 100644 index 00000000..f332187a Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_18.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco new file mode 100644 index 00000000..120581c4 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_2.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco new file mode 100644 index 00000000..e105925f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_20.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco new file mode 100644 index 00000000..dd7d0d8c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_23.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco new file mode 100644 index 00000000..a634cc0c Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_27.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco new file mode 100644 index 00000000..1b906f33 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_3.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco new file mode 100644 index 00000000..c934ee81 Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_4.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco new file mode 100644 index 00000000..210a8bdd Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_6.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco new file mode 100644 index 00000000..5b2d8a3b Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_8.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco new file mode 100644 index 00000000..d2d871af Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_conv2d_miopen_batch_norm_relu_9.hsaco differ diff --git a/machine_interface/tests/libs/hip/resnet50/tem_fused_linear_30.hsaco b/machine_interface/tests/libs/hip/resnet50/tem_fused_linear_30.hsaco new file mode 100644 index 00000000..9bbb069f Binary files /dev/null and b/machine_interface/tests/libs/hip/resnet50/tem_fused_linear_30.hsaco differ diff --git a/machine_interface/tests/libs/kernelCheck.cubin b/machine_interface/tests/libs/kernelCheck.cubin new file mode 100644 index 00000000..636ddcd6 Binary files /dev/null and b/machine_interface/tests/libs/kernelCheck.cubin differ diff --git a/machine_interface/tests/libs/kernelCheck.ptx b/machine_interface/tests/libs/kernelCheck.ptx new file mode 100644 index 00000000..a4ed98e2 --- /dev/null +++ b/machine_interface/tests/libs/kernelCheck.ptx @@ -0,0 +1,156 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-30672275 +// Cuda compilation tools, release 11.5, V11.5.119 +// Based on NVVM 7.0.1 +// + +.version 7.5 +.target sm_86 +.address_size 64 + + // .globl nothing + +.visible .entry nothing() +{ + + + + ret; + +} + // .globl self_add +.visible .entry self_add( + .param .u64 self_add_param_0 +) +{ + .reg .b32 %r<8>; + .reg .b64 %rd<5>; + + + ld.param.u64 %rd1, [self_add_param_0]; + cvta.to.global.u64 %rd2, %rd1; + mov.u32 %r1, %ctaid.x; + mov.u32 %r2, %ntid.x; + mov.u32 %r3, %tid.x; + mad.lo.s32 %r4, %r1, %r2, %r3; + mul.wide.s32 %rd3, %r4, 4; + add.s64 %rd4, %rd2, %rd3; + ld.global.u32 %r5, [%rd4]; + shl.b32 %r6, %r5, 1; + add.s32 %r7, %r6, %r4; + st.global.u32 [%rd4], %r7; + ret; + +} + // .globl matmul +.visible .entry matmul( + .param .u64 matmul_param_0, + .param .u64 matmul_param_1, + .param .u64 matmul_param_2, + .param .u32 matmul_param_3 +) +{ + .reg .pred %p<9>; + .reg .b32 %r<60>; + .reg .b64 %rd<34>; + + + ld.param.u64 %rd18, [matmul_param_0]; + ld.param.u64 %rd19, [matmul_param_1]; + ld.param.u64 %rd17, [matmul_param_2]; + ld.param.u32 %r20, [matmul_param_3]; + cvta.to.global.u64 %rd1, %rd19; + cvta.to.global.u64 %rd2, %rd18; + mov.u32 %r21, %ntid.y; + mov.u32 %r22, %ctaid.y; + mov.u32 %r23, %tid.y; + mad.lo.s32 %r1, %r22, %r21, %r23; + mov.u32 %r24, %ntid.x; + mov.u32 %r25, %ctaid.x; + mov.u32 %r26, %tid.x; + mad.lo.s32 %r2, %r25, %r24, %r26; + setp.ge.s32 %p1, %r1, %r20; + setp.ge.s32 %p2, %r2, %r20; + or.pred %p3, %p1, %p2; + @%p3 bra $L__BB2_9; + + setp.lt.s32 %p4, %r20, 1; + mov.u32 %r59, 0; + mul.lo.s32 %r3, %r1, %r20; + @%p4 bra $L__BB2_8; + + add.s32 %r31, %r20, -1; + and.b32 %r58, %r20, 3; + setp.lt.u32 %p5, %r31, 3; + mov.u32 %r55, 0; + mov.u32 %r59, %r55; + @%p5 bra $L__BB2_5; + + sub.s32 %r53, %r20, %r58; + mul.wide.s32 %rd20, %r2, 4; + add.s64 %rd31, %rd1, %rd20; + add.s32 %r34, %r3, 2; + mul.wide.s32 %rd21, %r34, 4; + add.s64 %rd30, %rd2, %rd21; + mul.wide.s32 %rd5, %r20, 4; + +$L__BB2_4: + ld.global.u32 %r35, [%rd31]; + ld.global.u32 %r36, [%rd30+-8]; + mad.lo.s32 %r37, %r35, %r36, %r59; + add.s64 %rd22, %rd31, %rd5; + ld.global.u32 %r38, [%rd22]; + ld.global.u32 %r39, [%rd30+-4]; + mad.lo.s32 %r40, %r38, %r39, %r37; + add.s64 %rd23, %rd22, %rd5; + ld.global.u32 %r41, [%rd23]; + ld.global.u32 %r42, [%rd30]; + mad.lo.s32 %r43, %r41, %r42, %r40; + add.s64 %rd24, %rd23, %rd5; + add.s64 %rd31, %rd24, %rd5; + ld.global.u32 %r44, [%rd24]; + ld.global.u32 %r45, [%rd30+4]; + mad.lo.s32 %r59, %r44, %r45, %r43; + add.s32 %r55, %r55, 4; + add.s64 %rd30, %rd30, 16; + add.s32 %r53, %r53, -4; + setp.ne.s32 %p6, %r53, 0; + @%p6 bra $L__BB2_4; + +$L__BB2_5: + setp.eq.s32 %p7, %r58, 0; + @%p7 bra $L__BB2_8; + + mad.lo.s32 %r46, %r55, %r20, %r2; + mul.wide.s32 %rd25, %r46, 4; + add.s64 %rd33, %rd1, %rd25; + mul.wide.s32 %rd11, %r20, 4; + add.s32 %r47, %r55, %r3; + mul.wide.s32 %rd26, %r47, 4; + add.s64 %rd32, %rd2, %rd26; + +$L__BB2_7: + .pragma "nounroll"; + ld.global.u32 %r48, [%rd33]; + ld.global.u32 %r49, [%rd32]; + mad.lo.s32 %r59, %r48, %r49, %r59; + add.s64 %rd33, %rd33, %rd11; + add.s64 %rd32, %rd32, 4; + add.s32 %r58, %r58, -1; + setp.ne.s32 %p8, %r58, 0; + @%p8 bra $L__BB2_7; + +$L__BB2_8: + cvta.to.global.u64 %rd27, %rd17; + add.s32 %r50, %r3, %r2; + mul.wide.s32 %rd28, %r50, 4; + add.s64 %rd29, %rd27, %rd28; + st.global.u32 [%rd29], %r59; + +$L__BB2_9: + ret; + +} + diff --git a/machine_interface/tests/libs/mlops.hsaco b/machine_interface/tests/libs/mlops.hsaco new file mode 100644 index 00000000..0ff58e51 Binary files /dev/null and b/machine_interface/tests/libs/mlops.hsaco differ diff --git a/machine_interface/tests/libs/module.hsaco b/machine_interface/tests/libs/module.hsaco new file mode 100644 index 00000000..d03df018 Binary files /dev/null and b/machine_interface/tests/libs/module.hsaco differ diff --git a/server/Cargo.toml b/server/Cargo.toml index 65626570..735e64e6 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -1,43 +1,54 @@ -[package] -name = "dandelion_server" -version = "0.1.0" -edition = "2021" - -[dependencies] -machine_interface = { path = "../machine_interface", features = ["bytes_context"] } -dandelion_commons = { path = "../dandelion_commons" } -dispatcher = { path = "../dispatcher" } -futures = { version = "0.3.28", default-features=false} -tokio = { version = "1", features = ["full"] } -hyper = { version = "1.2", features = ["server", "http1", "http2"] } -hyper-util = { version = "0.1", features = ["tokio", "server-auto"] } -http-body-util = "0.1" -core_affinity = "0.8" -num_cpus = "1.16.0" -bytes = "1.6" -http = "0.2" -log = { version = "0.4.20", features = ["serde", "release_max_level_warn"] } -env_logger = "0.10.1" -serde = "1.0.197" -serde_json = "1.0.115" -bson = "2.9.0" -serde_bytes = "0.11" -signal-hook = "0.3.17" -signal-hook-tokio = {version = "0.3.1", features = [ "futures-v0_3"]} -# input parsing -clap = {version = "4.5", features = ["env","derive"]} - -[dev-dependencies] -assert_cmd = "2.0" -byteorder = "1.5" -serial_test = "3.1.1" -reqwest = { version = "0.12", features = ["blocking"] } - -[features] -archive = [] -cheri = ["machine_interface/cheri"] -mmu = ["machine_interface/mmu"] -kvm = ["machine_interface/kvm"] -wasm = ["machine_interface/wasm"] -reqwest_io = ["machine_interface/reqwest_io"] -timestamp = ["dispatcher/timestamp"] \ No newline at end of file +[package] +name = "dandelion_server" +version = "0.1.0" +edition = "2021" + +[dependencies] +machine_interface = { path = "../machine_interface", features = ["bytes_context"] } +dandelion_commons = { path = "../dandelion_commons" } +dispatcher = { path = "../dispatcher" } +futures = { version = "0.3.28", default-features=false} +tokio = { version = "1", features = ["full"] } +hyper = { version = "1.2", features = ["server", "http1", "http2"] } +hyper-util = { version = "0.1", features = ["tokio", "server-auto"] } +http-body-util = "0.1" +core_affinity = "0.8" +num_cpus = "1.16.0" +bytes = "1.6" +http = "0.2" +log = { version = "0.4.20", features = ["serde", "release_max_level_warn"] } +env_logger = "0.10.1" +serde = "1.0.197" +serde_json = "1.0.115" +bson = "2.9.0" +serde_bytes = "0.11" +signal-hook = "0.3.17" +signal-hook-tokio = {version = "0.3.1", features = [ "futures-v0_3"]} +# input parsing +clap = {version = "4.5", features = ["env","derive"]} +lazy_static = "1.4.0" +flexbuffers = "25.2.10" + +[dev-dependencies] +assert_cmd = "2.0" +byteorder = "1.5" +serial_test = "3.1.1" +reqwest = { version = "0.12", features = ["blocking"] } + +[features] +archive = [] +cheri = ["machine_interface/cheri"] +mmu = ["machine_interface/mmu"] +kvm = ["machine_interface/kvm"] +wasm = ["machine_interface/wasm"] +reqwest_io = ["machine_interface/reqwest_io"] +timestamp = ["dispatcher/timestamp"] +gpu = [] +hip = ["gpu"] +cuda = ["gpu"] +gpu_thread = ["machine_interface/gpu_thread", "gpu"] +gpu_process = ["machine_interface/gpu_process", "gpu"] +reuse_weights = ["machine_interface/reuse_weights", "dandelion_commons/reuse_weights"] +weights_from_disk = ["machine_interface/weights_from_disk"] +gpu_queue = ["dispatcher/gpu_queue"] +auto_batching = ["machine_interface/auto_batching", "dispatcher/auto_batching"] diff --git a/server/src/config.rs b/server/src/config.rs index 84f6b596..a3e4c5b6 100644 --- a/server/src/config.rs +++ b/server/src/config.rs @@ -6,6 +6,8 @@ const DEFAULT_CONFIG_PATH: &str = "./dandelion.config"; const DEFAULT_PORT: u16 = 8080; const DEFAULT_SINGLE_CORE: bool = false; const DEFAULT_TIMESTAMP_COUNT: usize = 1000; +const DEFAULT_GPU_COUNT: usize = 1; +const DEFAULT_GPU_WORKER_COUNT: usize = 2; #[derive(serde::Deserialize, Parser, Debug)] pub struct DandelionConfig { @@ -26,6 +28,10 @@ pub struct DandelionConfig { pub frontend_cores: Option, #[arg(long, env)] pub io_cores: Option, + #[arg(long, env, default_value_t = DEFAULT_GPU_COUNT)] + pub gpu_count: usize, + #[arg(long, env, default_value_t = DEFAULT_GPU_WORKER_COUNT)] + pub gpu_worker_count: usize, #[arg(long, env, default_value_t = DEFAULT_TIMESTAMP_COUNT)] #[serde(default)] pub timestamp_count: usize, @@ -61,6 +67,14 @@ impl DandelionConfig { if let Some(other_val) = other.io_cores { self.io_cores.get_or_insert(other_val); } + if self.gpu_count != DEFAULT_GPU_COUNT && other.gpu_count != default.gpu_count { + self.gpu_count = other.gpu_count; + } + if self.gpu_worker_count != DEFAULT_GPU_WORKER_COUNT + && other.gpu_worker_count != default.gpu_worker_count + { + self.gpu_worker_count = other.gpu_worker_count; + } // timestamp count if other.timestamp_count != DEFAULT_TIMESTAMP_COUNT && self.timestamp_count != default.timestamp_count diff --git a/server/src/lib.rs b/server/src/lib.rs index f288bfed..75df6276 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -213,6 +213,23 @@ fn encode_response( .copy_from_slice(×tamp_string_len.to_le_bytes()); } + // if reuse_weights is on, add in gpu cache hit information as string + #[cfg(feature = "reuse_weights")] + { + // timestamps formated as formatted string, consisting of a length, the string and a NULL byte + response.push(2); + response.extend_from_slice("gpu_cache_hit\0".as_bytes()); + let recorder_length_offset = response.len(); + response.extend_from_slice(&0i32.to_be_bytes()); + let recorder_string = format!("{}", _timings); + response.extend_from_slice(recorder_string.as_bytes()); + response.push(0); + // set length + 1 to account for terminating 0 + let recorder_string_len = (recorder_string.len() + 1) as i32; + response[recorder_length_offset..recorder_length_offset + 4] + .copy_from_slice(&recorder_string_len.to_le_bytes()); + } + // end docuemnt and set length response.push(0); let doc_length = (response.len() + all_items) as i32; diff --git a/server/src/main.rs b/server/src/main.rs index 9ce765a5..d78d7175 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -10,6 +10,7 @@ use dispatcher::{ function_registry::Metadata, resource_pool::ResourcePool, }; +use http::request; use http_body_util::BodyExt; use hyper::{ body::{Body, Incoming}, @@ -153,6 +154,44 @@ fn default_path() -> String { } /// Struct containing registration information for new function +#[derive(Debug, Deserialize)] +struct RegisterLibrary { + name: String, + library: Vec, +} + +async fn register_library(req: Request) -> Result, Infallible> { + let bytes = req + .collect() + .await + .expect("Failed to extract bytes from library registration") + .to_bytes(); + + let request_map: RegisterLibrary = + bson::from_slice(&bytes).expect("Should be able to deserialise request"); + + // write library to file + let lib_path = FUNCTION_FOLDER_PATH.to_owned() + "/libs/"; + std::fs::create_dir_all(&lib_path).unwrap(); + let mut path_buff = PathBuf::from(&lib_path); + path_buff.push(request_map.name.clone()); + let mut function_file = std::fs::File::create(path_buff.clone()) + .expect("Failed to create file for registering library"); + function_file + .write_all(&request_map.library) + .expect("Failed to write file with content for registering"); + + return Ok::<_, Infallible>(Response::new(DandelionBody::from_vec( + "Library registered".as_bytes().to_vec(), + ))); +} + +#[derive(Debug, Deserialize)] +pub struct InputChunk { + #[serde(with = "serde_bytes")] + pub chunk: Vec, +} + #[derive(Debug, Deserialize)] struct RegisterFunction { /// String name of the function @@ -167,7 +206,7 @@ struct RegisterFunction { /// Binary representation of the function, ignored if a local path is given binary: Vec, /// Metadata for the sets and optionally static items to pass into the function for that set - input_sets: Vec<(String, Option)>>)>, + input_sets: Vec<(String, Option)>>)>, /// output set names output_sets: Vec, } @@ -181,9 +220,16 @@ async fn register_function( .await .expect("Failed to extract body from function registration") .to_bytes(); + info!("Size received: {:?}", bytes.len()); // find first line end character - let request_map: RegisterFunction = - bson::from_slice(&bytes).expect("Should be able to deserialize request"); + /*let request_map: RegisterFunction = + bson::from_slice(&bytes).expect("Should be able to deserialize request");*/ + + use flexbuffers; + let slice: &[u8] = &bytes; + let deserializer = flexbuffers::Reader::get_root(slice).unwrap(); + let request_map: RegisterFunction = RegisterFunction::deserialize(deserializer).unwrap(); + // if local is present ignore the binary let path_string = if !request_map.local_path.is_empty() { // check that file exists @@ -219,8 +265,18 @@ async fn register_function( "Kvm" => EngineType::Kvm, #[cfg(feature = "cheri")] "Cheri" => EngineType::Cheri, + #[cfg(feature = "gpu_thread")] + "GpuThread" => EngineType::GpuThread, + #[cfg(feature = "gpu_process")] + "GpuProcess" => EngineType::GpuProcess, unkown => panic!("Unkown engine type string {}", unkown), }; + + #[cfg(feature = "weights_from_disk")] + let inputs_folder = format!("{}/{}_weights", FUNCTION_FOLDER_PATH, &request_map.name); + #[cfg(feature = "weights_from_disk")] + std::fs::create_dir_all(&inputs_folder).unwrap(); + let input_sets = request_map .input_sets .into_iter() @@ -228,10 +284,29 @@ async fn register_function( if let Some(static_data) = data { let data_contexts = static_data .into_iter() - .map(|(item_name, data_vec)| { + .map(|(item_name, data_chunks)| { + let mut data_vec: Vec = Vec::new(); + for mut chunk in data_chunks { + data_vec.append(&mut chunk.chunk); + } let item_size = data_vec.len(); - let mut new_context = - ReadOnlyContext::new(data_vec.into_boxed_slice()).unwrap(); + + #[cfg(not(feature = "weights_from_disk"))] + let mut new_context = ReadOnlyContext::new(data_vec.into_boxed_slice()).unwrap(); + + #[cfg(feature = "weights_from_disk")] + let mut new_context = { + // write weight to file + let mut path_buff = format!("{}/{}", inputs_folder.clone(), name.clone()); + let mut function_file = std::fs::File::create(&path_buff) + .expect("Failed to create file for registering function weight"); + function_file + .write_all(&data_vec) + .expect("Failed to write file with content for registering function weight"); + + ReadOnlyContext::new_disk(data_vec.into_boxed_slice(), &path_buff).unwrap() + }; + new_context.content.push(Some(DataSet { ident: name.clone(), buffers: vec![DataItem { @@ -330,22 +405,39 @@ async fn service( // TODO rename to cold func and hot func, remove matmul, compute, io "/register/function" => register_function(req, dispatcher).await, "/register/composition" => register_composition(req, dispatcher).await, + "/register/library" => register_library(req).await, "/cold/matmul" | "/cold/matmulstore" | "/cold/compute" | "/cold/io" + | "/cold/inference" + | "/cold/inference-batched" | "/cold/chain_scaling" | "/cold/middleware_app" | "/cold/compression_app" | "/cold/python_app" => serve_request(true, req, dispatcher).await, + "/cold/double_matmul" + | "/cold/resnet18" + | "/cold/test" + | "/cold/model_inference"=> serve_request(true, req, dispatcher).await, "/hot/matmul" | "/hot/matmulstore" | "/hot/compute" | "/hot/io" + | "/hot/inference" + | "/hot/inference-batched" | "/hot/chain_scaling" | "/hot/middleware_app" | "/hot/compression_app" | "/hot/python_app" => serve_request(false, req, dispatcher).await, + "/hot/double_matmul" + | "/hot/resnet18" + | "/hot/test" + | "/hot/model_inference" => serve_request(false, req, dispatcher).await, + "/stats" => serve_stats(req).await, + _ => Ok::<_, Infallible>(Response::new(DandelionBody::from_vec( + format!("Hello, Wor\n").into_bytes(), + ))), "/stats" => serve_stats(req).await, other_uri => { trace!("Received request on {}", other_uri); @@ -490,16 +582,16 @@ fn main() -> () { ); } - let resource_conversion = |core_index| ComputeResource::CPU(core_index); + let resource_conversion = |core_index| ComputeResource::CPU(core_index as u8); let dispatcher_cores = config.get_dispatcher_cores(); let frontend_cores = config.get_frontend_cores(); - let communication_cores = config + let communication_cores: Vec = config .get_communication_cores() .into_iter() .map(|core| resource_conversion(core)) .collect(); - let compute_cores = config + let compute_cores: Vec = config .get_computation_cores() .into_iter() .map(|core| resource_conversion(core)) @@ -565,8 +657,31 @@ fn main() -> () { let engine_type = EngineType::Kvm; #[cfg(feature = "cheri")] let engine_type = EngineType::Cheri; + #[cfg(feature = "gpu_thread")] + let engine_type = EngineType::GpuThread; + #[cfg(feature = "gpu_process")] + let engine_type = EngineType::GpuProcess; #[cfg(any(feature = "cheri", feature = "wasm", feature = "mmu", feature = "kvm"))] pool_map.insert(engine_type, compute_cores); + #[cfg(any(feature = "gpu_thread", feature = "gpu_process"))] + { + let gpu_count: u8 = config.gpu_count as u8; + pool_map.insert( + engine_type, + config + .get_computation_cores() + .iter() + // The gpu_process engine relies on having such a contiguous region of CPU cores available -- one core + // goes to the Dandelion process thread of each worker and then another to the actual worker process. + // Once the system moves to giving Vecs of ComputeResources this needs to be changed + .step_by(config.gpu_worker_count * 2) + .zip(0..gpu_count) + .map(|(cpu_id, gpu_id)| { + ComputeResource::GPU(*cpu_id, gpu_id, config.gpu_worker_count as u8) + }) + .collect(), + ); + } #[cfg(feature = "reqwest_io")] pool_map.insert(EngineType::Reqwest, communication_cores); let resource_pool = ResourcePool { @@ -606,6 +721,14 @@ fn main() -> () { size: max_ram, }, ), + #[cfg(feature = "gpu")] + ( + DomainType::Gpu, + MemoryResource::Shared { + id: 0, + size: max_ram, + }, + ), #[cfg(feature = "wasm")] ( DomainType::RWasm, @@ -632,10 +755,20 @@ fn main() -> () { print!(" kvm"); #[cfg(feature = "wasm")] print!(" wasm"); + #[cfg(feature = "gpu_thread")] + print!(" gpu_thread"); + #[cfg(feature = "gpu_process")] + print!(" gpu_process"); #[cfg(feature = "reqwest_io")] print!(" request_io"); #[cfg(feature = "timestamp")] print!(" timestamp"); + #[cfg(feature = "reuse_weights")] + print!(" reuse_weights"); + #[cfg(feature = "weights_from_disk")] + print!(" weights_from_disk"); + #[cfg(feature = "auto_batching")] + print!(" auto_batching"); print!("\n"); // Run this server for... forever... unless I receive a signal! diff --git a/server/tests/server_tests.rs b/server/tests/server_tests.rs index 1120e112..e688531c 100644 --- a/server/tests/server_tests.rs +++ b/server/tests/server_tests.rs @@ -1,5 +1,12 @@ #[cfg(all( - any(feature = "wasm", feature = "mmu", feature = "kvm", feature = "cheri"), + any( + feature = "wasm", + feature = "mmu", + feature = "cheri", + feature = "kvm", + feature = "gpu_thread", + feature = "gpu_process" + ), feature = "reqwest_io" ))] mod server_tests { @@ -13,12 +20,24 @@ mod server_tests { use std::{ io::{BufRead, BufReader, Cursor, Read}, process::{Child, Command, Stdio}, + sync::Mutex, }; + // Prevent tests running in parallel to avoid address already in use errors + lazy_static::lazy_static! { + static ref TEST_LOCK: Mutex<()> = Mutex::new(()); + } + struct ServerKiller { server: Child, } + #[derive(Serialize)] + struct RegisterLibrary { + name: String, + library: Vec, + } + #[derive(Serialize)] struct RegisterFunction { name: String, @@ -89,16 +108,30 @@ mod server_tests { let mut data = Vec::new(); data.extend_from_slice(&i64::to_le_bytes(1)); data.extend_from_slice(&i64::to_le_bytes(1)); - let mat_request = DandelionRequest { - name: function_name, - sets: vec![InputSet { + #[cfg(feature = "gpu")] + let cfg = Vec::from(i64::to_le_bytes(1i64)); // GPU specific config input for eg. grid size + + let mut sets = vec![InputSet { + identifier: String::from("A"), + items: vec![InputItem { identifier: String::from(""), - items: vec![InputItem { - identifier: String::from(""), - key: 0, - data: &data, - }], + key: 0, + data: &data, }], + }]; + + #[cfg(feature = "gpu")] + sets.push(InputSet { + identifier: String::from("cfg"), + items: vec![InputItem { + identifier: String::from(""), + key: 0, + data: &cfg, + }], + }); + let mat_request = DandelionRequest { + name: function_name, + sets, }; let resp = client @@ -146,11 +179,52 @@ mod server_tests { version = "elf_cheri"; engine_type = String::from("Cheri"); } - let matmul_path = format!( - "{}/../machine_interface/tests/data/test_{}_matmul", - env!("CARGO_MANIFEST_DIR"), - version, - ); + let matmul_path; + #[cfg(any(feature = "wasm", feature = "mmu", feature = "cheri"))] + { + matmul_path = format!( + "{}/../machine_interface/tests/data/test_{}_matmul", + env!("CARGO_MANIFEST_DIR"), + version, + ); + } + // TODO: unify with other engines + #[cfg(feature = "gpu")] + { + matmul_path = format!( + "{}/../machine_interface/tests/data/hip/test_gpu_matmul_para.json", + env!("CARGO_MANIFEST_DIR"), + ); + #[cfg(feature = "gpu_thread")] + { + engine_type = String::from("GpuThread"); + } + #[cfg(feature = "gpu_process")] + { + engine_type = String::from("GpuProcess"); + } + } + + // Register GPU kernel library + #[cfg(feature = "gpu")] + { + let register_library = RegisterLibrary { + name: String::from("mlops.hsaco"), + library: std::fs::read(format!( + "{}/../machine_interface/tests/libs/mlops.hsaco", + env!("CARGO_MANIFEST_DIR") + )) + .unwrap(), + }; + + let library_client = reqwest::blocking::Client::new(); + let library_resp = library_client + .post("http://localhost:8080/register/library") + .body(bson::to_vec(®ister_library).unwrap()) + .send() + .unwrap(); + assert!(library_resp.status().is_success()); + } let version_string = match http_version { reqwest::Version::HTTP_09 => "0_9", @@ -194,6 +268,7 @@ mod server_tests { let chain_name = format!("chain_{}", version_string); let chain_request = RegisterChain { + #[cfg(not(feature = "gpu"))] composition: format!( r#" function {function} (InMats) => (OutMats); @@ -205,6 +280,18 @@ mod server_tests { function = function_name, chain = chain_name, ), + #[cfg(feature = "gpu")] + composition: format!( + r#" + (:function {function} (InMats Config) -> (OutMats)) + (:composition {chain} (CompInMats CompConfig) -> (CompOutMats) ( + ({function} ((:all InMats <- CompInMats) (:all Config <- CompConfig)) => ((InterMat := OutMats))) + ({function} ((:all InMats <- InterMat) (:all Config <- CompConfig)) => ((CompOutMats := OutMats))) + )) + "#, + function = function_name, + chain = chain_name, + ), }; let chain_resp = client @@ -236,6 +323,7 @@ mod server_tests { let server = cmd .stdout(Stdio::piped()) .stderr(Stdio::piped()) + .env("DANDELION_LIBRARY_PATH", "/tmp/dandelion_server/libs/") .spawn() .unwrap(); let mut server_killer = ServerKiller { server }; @@ -306,6 +394,7 @@ mod server_tests { let server = cmd .stdout(Stdio::piped()) .stderr(Stdio::piped()) + .env("DANDELION_LIBRARY_PATH", "/tmp/dandelion_server/libs/") .spawn() .unwrap(); let mut server_killer = ServerKiller { server }; @@ -330,4 +419,193 @@ mod server_tests { let status = status_result.unwrap(); assert_eq!(status, None, "Server exited unexpectedly"); } + + fn send_inference_request(endpoint: &str, function_name: String) { + // call into function + let mut matrix_data = Vec::new(); + matrix_data.extend_from_slice(&f32::to_le_bytes(224f32)); + matrix_data.extend_from_slice(&f32::to_le_bytes(224f32)); + for i in 0..(224 * 224) { + matrix_data.extend_from_slice(&f32::to_le_bytes(i as f32)); + } + + let mut kernel_data = Vec::new(); + kernel_data.extend_from_slice(&f32::to_le_bytes(5f32)); + kernel_data.extend_from_slice(&f32::to_le_bytes(5f32)); + for i in 0..(5 * 5) { + kernel_data.extend_from_slice(&f32::to_le_bytes(i as f32)); + } + + let mut cfg = Vec::new(); + cfg.extend_from_slice(&i64::to_le_bytes((112 * 112 + 2) * 4)); + cfg.extend_from_slice(&i64::to_le_bytes((224 + 31) / 32)); + cfg.extend_from_slice(&i64::to_le_bytes((112 + 31) / 32)); + cfg.extend_from_slice(&i64::to_le_bytes(500)); + + let sets = vec![ + InputSet { + identifier: String::from("A"), + items: vec![InputItem { + identifier: String::from(""), + key: 0, + data: &matrix_data, + }], + }, + InputSet { + identifier: String::from("B"), + items: vec![InputItem { + identifier: String::from(""), + key: 0, + data: &kernel_data, + }], + }, + InputSet { + identifier: String::from("cfg"), + items: vec![InputItem { + identifier: String::from(""), + key: 0, + data: &cfg, + }], + }, + ]; + + let mat_request = DandelionRequest { + name: function_name, + sets, + }; + + let client = reqwest::blocking::Client::new(); + let resp = client + .post(endpoint) + .body(bson::to_vec(&mat_request).unwrap()) + .send() + .unwrap(); + assert!(resp.status().is_success()); + + let body = resp.bytes().unwrap(); + let response: DandelionDeserializeResponse = bson::from_slice(&body).unwrap(); + assert_eq!(1, response.sets.len()); + assert_eq!(1, response.sets[0].items.len()); + let response_data = response.sets[0].items[0].data; + assert_eq!(response_data.len(), (112 * 112 + 2) * 4); + } + + #[cfg(any(feature = "gpu", feature = "mmu"))] + #[test] + #[serial] + fn serve_inference() { + let mut cmd = Command::cargo_bin("dandelion_server").unwrap(); + let mut server = cmd + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .env("DANDELION_LIBRARY_PATH", "/tmp/dandelion_server/libs/") + .spawn() + .unwrap(); + let mut reader = BufReader::new(server.stdout.take().unwrap()); + loop { + let mut buf = String::new(); + let len = reader.read_line(&mut buf).unwrap(); + assert_ne!(len, 0, "Server exited unexpectedly"); + if buf.contains("Server start") { + break; + } + } + let _ = server.stdout.insert(reader.into_inner()); + let mut server_killer = ServerKiller { server }; + + // register function + let engine_type; + #[cfg(feature = "gpu")] + let inference_path = format!( + "{}/../machine_interface/tests/data/hip/test_gpu_inference.json", + env!("CARGO_MANIFEST_DIR"), + ); + #[cfg(feature = "mmu")] + let inference_path = format!( + "{}/../machine_interface/tests/data/test_elf_mmu_x86_64_inference", + env!("CARGO_MANIFEST_DIR"), + ); + #[cfg(feature = "gpu_thread")] + { + engine_type = String::from("GpuThread"); + } + #[cfg(feature = "gpu_process")] + { + engine_type = String::from("GpuProcess"); + } + #[cfg(feature = "mmu")] + { + engine_type = String::from("Process"); + } + + // Register GPU kernel library + #[cfg(feature = "gpu")] + { + let register_library = RegisterLibrary { + name: String::from("mlops.hsaco"), + library: std::fs::read(format!( + "{}/../machine_interface/tests/libs/mlops.hsaco", + env!("CARGO_MANIFEST_DIR") + )) + .unwrap(), + }; + + let library_client = reqwest::blocking::Client::new(); + let library_resp = library_client + .post("http://localhost:8080/register/library") + .body(bson::to_vec(®ister_library).unwrap()) + .send() + .unwrap(); + assert!(library_resp.status().is_success()); + } + + let register_request = RegisterFunction { + name: String::from("inference"), + context_size: 0x802_0000, + binary: std::fs::read(inference_path).unwrap(), + engine_type, + input_sets: vec![ + (String::from("A"), None), + (String::from("B"), None), + (String::from("cfg"), None), + ], + output_sets: vec![String::from("D")], + }; + let registration_client = reqwest::blocking::Client::new(); + let registration_resp = registration_client + .post("http://localhost:8080/register/function") + .body(bson::to_vec(®ister_request).unwrap()) + .send() + .unwrap(); + assert!(registration_resp.status().is_success()); + + let chain_request = RegisterChain { + composition: String::from( + r#" + (:function inference (A B cfg) -> (out)) + (:composition chain (img kern func_cfg) -> (img_out) ( + (inference ((:all A <- img) (:all B <- kern) (:all cfg <- func_cfg)) => ((img_out := out))) + )) + "#, + ), + }; + let chain_client = reqwest::blocking::Client::new(); + let chain_resp = chain_client + .post("http://localhost:8080/register/composition") + .body(bson::to_vec(&chain_request).unwrap()) + .send() + .unwrap(); + assert!(chain_resp.status().is_success()); + + send_inference_request( + "http://localhost:8080/hot/inference", + String::from("inference"), + ); + send_inference_request("http://localhost:8080/hot/inference", String::from("chain")); + + let status_result = server_killer.server.try_wait(); + drop(server_killer); + let status = status_result.unwrap(); + assert_eq!(status, None, "Server exited unexpectedly"); + } }