From 297ff70d7c5511eac30c06dba242c674f0c0a6cd Mon Sep 17 00:00:00 2001 From: shirohasuki Date: Fri, 16 Jan 2026 19:10:39 +0800 Subject: [PATCH 1/3] feat: update gem5 to support socket --- bebop/src/arch/gemmini/gemmini.rs | 55 +++++- host/gem5/bebop.patch | 303 +++++++++++++++++++----------- host/gem5/install-gem5.sh | 2 +- host/gem5/riscv-se.py | 8 +- 4 files changed, 255 insertions(+), 113 deletions(-) diff --git a/bebop/src/arch/gemmini/gemmini.rs b/bebop/src/arch/gemmini/gemmini.rs index fb07c76..9ff7673 100644 --- a/bebop/src/arch/gemmini/gemmini.rs +++ b/bebop/src/arch/gemmini/gemmini.rs @@ -556,6 +556,42 @@ impl Gemmini { } } + // Batch read DIM bytes from DRAM (optimized for DIM-sized chunks) + fn read_batch_dim(&self, addr: RegT) -> [u8; DIM] { + let mut result = [0u8; DIM]; + + if let Some(ref dma_read) = self.state.dma_read { + let mut handler = dma_read.lock().unwrap(); + + match handler.read(addr, DIM as u32) { + Ok(data) => { + for i in 0..DIM { + result[i] = ((data >> (i * 8)) & 0xFF) as u8; + } + }, + Err(_) => { + // Return zeros on error + } + } + } + + result + } + + // Batch write DIM bytes to DRAM (optimized for DIM-sized chunks) + fn write_batch_dim(&mut self, addr: RegT, data: &[u8; DIM]) { + if let Some(ref dma_write) = self.state.dma_write { + let mut handler = dma_write.lock().unwrap(); + + let mut data_u128: u128 = 0; + for i in 0..DIM { + data_u128 |= (data[i] as u128) << (i * 8); + } + + let _ = handler.write(addr, data_u128, DIM as u32); + } + } + fn read_matrix_from_dram( &self, addr: RegT, @@ -573,13 +609,26 @@ impl Gemmini { panic!("ERROR: non-zeroable matrix given address zero!"); } + // Batch read optimization: read DIM bytes at a time for i in 0..rows as usize { let ii = if repeating_bias { 0 } else { i }; let dram_row_addr = addr + (ii * cols as usize * std::mem::size_of::()) as u64; - for j in 0..cols as usize { - let dram_byte_addr = dram_row_addr + (j * std::mem::size_of::()) as u64; - result[i][j] = self.read_from_dram::(dram_byte_addr); + // Read in DIM-byte chunks + for j in (0..cols as usize).step_by(DIM) { + let remaining = cols as usize - j; + if remaining >= DIM { + // Read full DIM bytes + let bytes = self.read_batch_dim(dram_row_addr + j as u64); + for k in 0..DIM { + result[i][j + k] = bytes[k] as ElemT; + } + } else { + // Handle remaining bytes individually (fallback for tail) + for k in 0..remaining { + result[i][j + k] = self.read_from_dram::(dram_row_addr + (j + k) as u64); + } + } } } diff --git a/host/gem5/bebop.patch b/host/gem5/bebop.patch index f3a997b..e3f39b9 100644 --- a/host/gem5/bebop.patch +++ b/host/gem5/bebop.patch @@ -18,31 +18,6 @@ index a5802ad371..b3008bd565 100755 Export('main') from gem5_scons.util import get_termcap -diff --git a/src/arch/riscv/faults.cc b/src/arch/riscv/faults.cc -index dc312b5f67..4e58948316 100644 ---- a/src/arch/riscv/faults.cc -+++ b/src/arch/riscv/faults.cc -@@ -31,6 +31,7 @@ - - #include "arch/riscv/faults.hh" - -+#include "arch/riscv/insts/custom.hh" - #include "arch/riscv/insts/static_inst.hh" - #include "arch/riscv/isa.hh" - #include "arch/riscv/mmu.hh" -@@ -286,6 +287,12 @@ void - UnknownInstFault::invokeSE(ThreadContext *tc, const StaticInstPtr &inst) - { - auto *rsi = static_cast(inst.get()); -+ const uint8_t opcode = rsi->machInst.opcode; -+ // Handle custom-3 (opcode 0x7b) which conflicts with M5Op -+ if (opcode == 0x7b) { -+ handleRiscvCustomInstruction(tc, rsi->machInst, inst.get()); -+ return; -+ } - panic("Unknown instruction 0x%08x at pc %s", rsi->machInst, - tc->pcState()); - } diff --git a/src/arch/riscv/insts/SConscript b/src/arch/riscv/insts/SConscript index 9694cc1405..8449eb7c80 100644 --- a/src/arch/riscv/insts/SConscript @@ -57,19 +32,25 @@ index 9694cc1405..8449eb7c80 100644 Source('static_inst.cc', tags=['riscv isa']) diff --git a/src/arch/riscv/insts/custom.cc b/src/arch/riscv/insts/custom.cc new file mode 100644 -index 0000000000..0f8bfd16d4 +index 0000000000..28b8ee8cd5 --- /dev/null +++ b/src/arch/riscv/insts/custom.cc -@@ -0,0 +1,148 @@ +@@ -0,0 +1,242 @@ +#include "arch/riscv/insts/custom.hh" + ++#include ++#include ++#include ++#include ++#include "base/types.hh" +#include "ipc/socket.h" +#include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/pcstate.hh" +#include "arch/riscv/regs/int.hh" -+#include "debug/Faults.hh" -+#include "mem/se_translating_port_proxy.hh" -+#include "sim/debug.hh" ++#include "mem/page_table.hh" ++#include "mem/physical.hh" ++#include "sim/process.hh" ++#include "sim/system.hh" + +namespace gem5 +{ @@ -79,6 +60,9 @@ index 0000000000..0f8bfd16d4 +namespace +{ + ++// Global mutex for DMA memory access synchronization ++static std::mutex dma_mutex; ++ +struct RoCCInstFields { + unsigned opcode : 7; + unsigned rd : 5; @@ -109,6 +93,12 @@ index 0000000000..0f8bfd16d4 + RoCCInst rocc{}; + rocc.bits = instBits.instBits; + ++ // printf("[GEM5-BEBOP] Custom instruction detected!\n"); ++ // printf("[GEM5-BEBOP] opcode=0x%x, funct=%d, rd=%d, rs1=%d, rs2=%d\n", ++ // rocc.r.opcode, rocc.r.funct, rocc.r.rd, rocc.r.rs1, rocc.r.rs2); ++ // printf("[GEM5-BEBOP] xd=%d, xs1=%d, xs2=%d\n", rocc.r.xd, rocc.r.xs1, rocc.r.xs2); ++ // fflush(stdout); ++ + RegVal xs1 = rocc.r.xs1 ? + tc->getReg(intRegClass[rocc.r.rs1]) : + static_cast(-1); @@ -116,95 +106,174 @@ index 0000000000..0f8bfd16d4 + tc->getReg(intRegClass[rocc.r.rs2]) : + static_cast(-1); + -+ // DMA read callback: reads from guest memory -+ auto read_cb = [tc](uint64_t addr, uint32_t size) -> dma_data_128_t { -+ SETranslatingPortProxy proxy(tc); ++ // printf("[GEM5-BEBOP] Reading x%d (rs1) -> xs1=0x%lx\n", rocc.r.rs1, xs1); ++ // printf("[GEM5-BEBOP] Reading x%d (rs2) -> xs2=0x%lx\n", rocc.r.rs2, xs2); ++ // fflush(stdout); ++ ++ // Get page table and system for DMA callbacks ++ auto *process = tc->getProcessPtr(); ++ auto *pTable = process->pTable; ++ auto *system = tc->getSystemPtr(); ++ ++ // Get backing store for direct memory access (thread-safe!) ++ auto backing_store = system->getPhysMem().getBackingStore(); ++ ++ // printf("[GEM5-BEBOP] Captured pTable=%p, system=%p, backing_store entries=%zu for DMA callbacks\n", ++ // pTable, system, backing_store.size()); ++ // fflush(stdout); ++ ++ // DMA read callback: uses page table translation + direct memory access ++ // This runs in a separate thread, so we use raw memory pointers (thread-safe!) ++ auto read_cb = [pTable, backing_store](uint64_t addr, uint32_t size) -> dma_data_128_t { ++ // printf("[GEM5-LOG] DMA read request (in DMA thread): addr=0x%lx, size=%u\n", addr, size); ++ // fflush(stdout); ++ + dma_data_128_t value = {0, 0}; + -+ switch (size) { -+ case 1: { -+ uint8_t data = 0; -+ proxy.readBlob(addr, reinterpret_cast(&data), size); -+ value.lo = data; -+ break; -+ } -+ case 2: { -+ uint16_t data = 0; -+ proxy.readBlob(addr, reinterpret_cast(&data), size); -+ value.lo = data; -+ break; -+ } -+ case 4: { -+ uint32_t data = 0; -+ proxy.readBlob(addr, reinterpret_cast(&data), size); -+ value.lo = data; -+ break; -+ } -+ case 8: { -+ uint64_t data = 0; -+ proxy.readBlob(addr, reinterpret_cast(&data), size); -+ value.lo = data; -+ break; -+ } -+ case 16: { -+ proxy.readBlob(addr, reinterpret_cast(&value.lo), 8); -+ proxy.readBlob(addr + 8, reinterpret_cast(&value.hi), 8); -+ break; -+ } -+ default: -+ fprintf(stderr, "bebop: Invalid DMA read size %u\n", size); -+ abort(); ++ // Use page table to translate addresses (page table is read-only, relatively safe) ++ // Then use direct memory access via raw pointers (completely thread-safe!) ++ std::lock_guard lock(dma_mutex); ++ ++ // Read byte by byte to handle page boundaries ++ uint8_t *result_ptr = reinterpret_cast(&value.lo); ++ for (uint32_t i = 0; i < size; i++) { ++ Addr vaddr = addr + i; ++ Addr paddr = 0; ++ ++ if (!pTable->translate(vaddr, paddr)) { ++ // fprintf(stderr, "[GEM5-BEBOP] Failed to translate vaddr=0x%lx\n", vaddr); ++ // fflush(stderr); ++ return value; ++ } ++ ++ // Find backing store entry containing this physical address ++ bool found = false; ++ for (const auto& entry : backing_store) { ++ if (entry.range.contains(paddr)) { ++ // Calculate offset within this entry ++ Addr offset = paddr - entry.range.start(); ++ // Direct memory access (thread-safe!) ++ uint8_t byte_val = entry.pmem[offset]; ++ ++ if (i < 8) { ++ result_ptr[i] = byte_val; ++ } else { ++ uint8_t *hi_ptr = reinterpret_cast(&value.hi); ++ hi_ptr[i - 8] = byte_val; ++ } ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) { ++ fprintf(stderr, "[GEM5-BEBOP] Physical address 0x%lx not found in backing store\n", paddr); ++ fflush(stderr); ++ } + } + ++ // printf("[GEM5-BEBOP] DMA read complete: addr=0x%lx, value=0x%016lx%016lx\n", ++ // addr, value.hi, value.lo); ++ // fflush(stdout); + return value; + }; + -+ // DMA write callback: writes to guest memory -+ auto write_cb = [tc](uint64_t addr, dma_data_128_t data, uint32_t size) { -+ SETranslatingPortProxy proxy(tc); ++ // DMA write callback: uses page table translation + direct memory access ++ // This runs in a separate thread, so we use raw memory pointers (thread-safe!) ++ auto write_cb = [pTable, backing_store](uint64_t addr, dma_data_128_t data, uint32_t size) { ++ // printf("[GEM5-BEBOP] DMA write request (in DMA thread): addr=0x%lx, size=%u, data=0x%016lx%016lx\n", ++ // addr, size, data.hi, data.lo); ++ // fflush(stdout); + -+ switch (size) { -+ case 1: { -+ uint8_t byte_data = static_cast(data.lo); -+ proxy.writeBlob(addr, reinterpret_cast(&byte_data), size); -+ break; -+ } -+ case 2: { -+ uint16_t half_data = static_cast(data.lo); -+ proxy.writeBlob(addr, reinterpret_cast(&half_data), size); -+ break; -+ } -+ case 4: { -+ uint32_t word_data = static_cast(data.lo); -+ proxy.writeBlob(addr, reinterpret_cast(&word_data), size); -+ break; -+ } -+ case 8: { -+ proxy.writeBlob(addr, reinterpret_cast(&data.lo), size); -+ break; -+ } -+ case 16: { -+ proxy.writeBlob(addr, reinterpret_cast(&data.lo), 8); -+ proxy.writeBlob(addr + 8, reinterpret_cast(&data.hi), 8); -+ break; -+ } -+ default: -+ fprintf(stderr, "bebop: Invalid DMA write size %u\n", size); -+ abort(); ++ std::lock_guard lock(dma_mutex); ++ ++ // Write byte by byte to handle page boundaries ++ const uint8_t *data_ptr = reinterpret_cast(&data.lo); ++ for (uint32_t i = 0; i < size; i++) { ++ Addr vaddr = addr + i; ++ Addr paddr = 0; ++ ++ if (!pTable->translate(vaddr, paddr)) { ++ // fprintf(stderr, "[GEM5-BEBOP] Failed to translate vaddr=0x%lx\n", vaddr); ++ // fflush(stderr); ++ return; ++ } ++ ++ // Find backing store entry containing this physical address ++ bool found = false; ++ for (const auto& entry : backing_store) { ++ if (entry.range.contains(paddr)) { ++ // Calculate offset within this entry ++ Addr offset = paddr - entry.range.start(); ++ ++ // Get the byte to write ++ uint8_t byte_val; ++ if (i < 8) { ++ byte_val = data_ptr[i]; ++ } else { ++ const uint8_t *hi_ptr = reinterpret_cast(&data.hi); ++ byte_val = hi_ptr[i - 8]; ++ } ++ ++ // Direct memory access (thread-safe!) ++ entry.pmem[offset] = byte_val; ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) { ++ fprintf(stderr, "[GEM5-BEBOP] Physical address 0x%lx not found in backing store\n", paddr); ++ fflush(stderr); ++ } + } ++ ++ // printf("[GEM5-BEBOP] DMA write complete: addr=0x%lx\n", addr); ++ // fflush(stdout); + }; + + auto &client = getSocketClient(); ++ ++ // Initialize socket connection if not already connected ++ if (!client.is_connected()) { ++ // printf("[GEM5-LOG] Initializing socket connection...\n"); ++ // fflush(stdout); ++ if (!client.init()) { ++ fprintf(stderr, "[GEM5-BEBOP] ERROR: Failed to initialize socket connection!\n"); ++ fflush(stderr); ++ // Return 0 as default result on connection failure ++ if (rocc.r.xd) ++ tc->setReg(intRegClass[rocc.r.rd], static_cast(0)); ++ auto pc_state = tc->pcState().as(); ++ inst->advancePC(pc_state); ++ tc->pcState(pc_state); ++ return; ++ } ++ // printf("[GEM5-BEBOP] Socket connection established!\n"); ++ // fflush(stdout); ++ } ++ + client.set_dma_callbacks(read_cb, write_cb); ++ ++ // printf("[GEM5-LOG] Sending command to bebop: funct=%d, xs1=0x%lx, xs2=0x%lx\n", ++ // rocc.r.funct, xs1, xs2); ++ // fflush(stderr); ++ + uint64_t result = client.send_and_wait(rocc.r.funct, xs1, xs2); ++ ++ // printf("[GEM5-BEBOP] Received result from bebop: 0x%lx\n", result); ++ // fflush(stdout); ++ + client.set_dma_callbacks(dma_read_cb_t(), dma_write_cb_t()); + + if (rocc.r.xd) + tc->setReg(intRegClass[rocc.r.rd], result); + -+ auto pc_state = tc->pcState().as(); -+ inst->advancePC(pc_state); -+ tc->pcState(pc_state); ++ // Don't manually update PC - let gem5's tick() function handle it ++ // The problem was that we were updating PC here, and then gem5 was ++ // updating it again in tick(), causing it to skip instructions ++ // printf("[GEM5-BEBOP] Instruction complete, letting gem5 advance PC\n"); ++ // fflush(stdout); +} + +} // namespace RiscvISA @@ -236,15 +305,16 @@ index 0000000000..3e09b67199 + +#endif // __ARCH_RISCV_CUSTOM_INST_HH__ diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa -index 6235b34aee..678c3db2ea 100644 +index 6235b34aee..04724897b8 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa -@@ -6360,6 +6360,22 @@ decode QUADRANT default Unknown::unknown() { +@@ -6360,6 +6360,23 @@ decode QUADRANT default Unknown::unknown() { } } +- 0x1e: M5Op::M5Op(); + // Custom instructions (bebop extension) -+ // custom-0 (opcode 0x0b), custom-1 (opcode 0x2b), custom-2 (opcode 0x5b) ++ // custom-0 (opcode 0x0b), custom-1 (opcode 0x2b), custom-2 (opcode 0x5b), custom-3 (opcode 0x7b) + format ROp { + 0x02: bebop_custom0({{ + handleRiscvCustomInstruction(xc->tcBase(), machInst, this); @@ -255,11 +325,12 @@ index 6235b34aee..678c3db2ea 100644 + 0x16: bebop_custom2({{ + handleRiscvCustomInstruction(xc->tcBase(), machInst, this); + }}); ++ 0x1e: bebop_custom3({{ ++ handleRiscvCustomInstruction(xc->tcBase(), machInst, this); ++ }}); + } + -+ // M5Op uses 0x1e which conflicts with custom-3 (opcode 0x7b) -+ // Keep M5Op for now, custom-3 handled in unknown fault if needed - 0x1e: M5Op::M5Op(); ++ 0x1f: M5Op::M5Op(); } } diff --git a/src/arch/riscv/isa/includes.isa b/src/arch/riscv/isa/includes.isa @@ -274,3 +345,23 @@ index b4be0e4ac6..077953e8cc 100644 #include "arch/riscv/insts/mem.hh" #include "arch/riscv/insts/pseudo.hh" #include "arch/riscv/insts/standard.hh" +diff --git a/util/m5/src/abi/riscv/m5op.S b/util/m5/src/abi/riscv/m5op.S +index 1b0376a131..0d6035b718 100644 +--- a/util/m5/src/abi/riscv/m5op.S ++++ b/util/m5/src/abi/riscv/m5op.S +@@ -39,13 +39,13 @@ + #include + + // riscv pseudo instructions have bit 1:0 (QUADRANT) = 0x3, +-// bit 6:2 (OPCODE5) = 0x1e, and bit 31:25 (M5FUNC) specifies ++// bit 6:2 (OPCODE5) = 0x1f, and bit 31:25 (M5FUNC) specifies + // the function performed by pseudo instruction + + .macro m5op_func, name, func + .globl \name + \name: +- .long 0x0000007b | (\func << 25) ++ .long 0x0000007f | (\func << 25) + ret + .endm + diff --git a/host/gem5/install-gem5.sh b/host/gem5/install-gem5.sh index 4f04e4f..8c9ed97 100755 --- a/host/gem5/install-gem5.sh +++ b/host/gem5/install-gem5.sh @@ -24,7 +24,7 @@ cmake --build ${HOST_BUILD} --target bebop_ipc -j$(nproc) cd ${GEM5_ROOT} # Apply the patch to gem5 -# git apply ${SCRIPT_DIR}/bebop.patch +git apply ${SCRIPT_DIR}/bebop.patch # We need to update the patch in this way if we make changes to gem5 # git add -A && git diff --cached > ../bebop.patch diff --git a/host/gem5/riscv-se.py b/host/gem5/riscv-se.py index 9e8d26d..b372b80 100644 --- a/host/gem5/riscv-se.py +++ b/host/gem5/riscv-se.py @@ -33,13 +33,15 @@ system.clk_domain.voltage_domain = VoltageDomain() # Set memory mode and range -# system.mem_mode = "timing" -system.mem_mode = "atomic" +# system.mem_mode = "atomic" +system.mem_mode = "timing" system.mem_ranges = [AddrRange("8GiB")] # Create CPU +# system.cpu = AtomicSimpleCPU() # system.cpu = RiscvTimingSimpleCPU() -system.cpu = AtomicSimpleCPU() +system.cpu = RiscvMinorCPU() +# system.cpu = RiscvO3CPU() # Create memory bus system.membus = SystemXBar() From b968685fcc49735361f2031be633042a5f643b86 Mon Sep 17 00:00:00 2001 From: dyy <1533208939@qq.com> Date: Sun, 8 Feb 2026 21:50:13 +0800 Subject: [PATCH 2/3] fix mvinmvout --- bebop/src/arch/buckyball/bank.rs | 69 +++-- bebop/src/arch/buckyball/mem_ctrl.rs | 366 ++++++++++++++---------- bebop/src/arch/buckyball/rob.rs | 21 +- bebop/src/arch/buckyball/rs.rs | 20 +- bebop/src/arch/buckyball/scoreboard.rs | 26 ++ bebop/src/arch/buckyball/tdma_loader.rs | 91 ++++-- bebop/src/arch/buckyball/tdma_storer.rs | 70 +++-- bebop/src/arch/buckyball/vecball.rs | 11 + 8 files changed, 433 insertions(+), 241 deletions(-) diff --git a/bebop/src/arch/buckyball/bank.rs b/bebop/src/arch/buckyball/bank.rs index 273fad1..e8e2331 100644 --- a/bebop/src/arch/buckyball/bank.rs +++ b/bebop/src/arch/buckyball/bank.rs @@ -116,34 +116,38 @@ impl Bank { impl DevsModel for Bank { fn events_ext(&mut self, incoming_message: &ModelMessage, services: &mut Services) -> Result<(), SimulationError> { if incoming_message.port_name == self.write_bank_req_port { - let value: (u64, u64, Vec) = - serde_json::from_str(&incoming_message.content).map_err(|_| SimulationError::InvalidModelState)?; - - let vbank_id = value.0; - let start_addr = value.1; - let data_u64 = value.2; - - let mut data_vec = Vec::new(); - for i in (0..data_u64.len()).step_by(2) { - if i + 1 < data_u64.len() { - let lo = data_u64[i]; - let hi = data_u64[i + 1]; - data_vec.push((hi as u128) << 64 | (lo as u128)); + match serde_json::from_str::<(u64, u64, Vec)>(&incoming_message.content) { + Ok(value) => { + let vbank_id = value.0; + let start_addr = value.1; + let data_u64 = value.2; + + let mut data_vec = Vec::new(); + for i in (0..data_u64.len()).step_by(2) { + if i + 1 < data_u64.len() { + let lo = data_u64[i]; + let hi = data_u64[i + 1]; + data_vec.push((hi as u128) << 64 | (lo as u128)); + } + } + + if vbank_id < self.banks.len() as u64 { + self.banks[vbank_id as usize].write_batch(start_addr, &data_vec); + self.sync_bank_data(); + + model_record!( + self, + services, + "write_bank", + format!("id={}, count={}", vbank_id, data_vec.len()) + ); + } + }, + Err(_) => { + // Failed to deserialize write request, skipping this request } } - if vbank_id < self.banks.len() as u64 { - self.banks[vbank_id as usize].write_batch(start_addr, &data_vec); - self.sync_bank_data(); - - model_record!( - self, - services, - "write_bank", - format!("id={}, count={}", vbank_id, data_vec.len()) - ); - } - return Ok(()); } @@ -159,10 +163,17 @@ impl DevsModel for Bank { }); for data_vec in ready_responses { - messages.push(ModelMessage { - content: serde_json::to_string(&data_vec).map_err(|_| SimulationError::InvalidModelState)?, - port_name: self.read_bank_resp_port.clone(), - }); + match serde_json::to_string(&data_vec) { + Ok(content) => { + messages.push(ModelMessage { + content, + port_name: self.read_bank_resp_port.clone(), + }); + }, + Err(_) => { + // Failed to serialize read response, skipping this response + } + } } self.until_next_event = INFINITY; diff --git a/bebop/src/arch/buckyball/mem_ctrl.rs b/bebop/src/arch/buckyball/mem_ctrl.rs index db9820a..dcdf6b4 100644 --- a/bebop/src/arch/buckyball/mem_ctrl.rs +++ b/bebop/src/arch/buckyball/mem_ctrl.rs @@ -71,111 +71,127 @@ impl DevsModel for MemController { fn events_ext(&mut self, incoming_message: &ModelMessage, services: &mut Services) -> Result<(), SimulationError> { // Handle write requests from TDMA (multi-cycle) if incoming_message.port_name == self.tdma_write_req_port { - // Parse request: (rob_id, vbank_id, start_addr, data_u64) - let value: (u64, u64, u64, Vec) = - serde_json::from_str(&incoming_message.content).map_err(|_| SimulationError::InvalidModelState)?; - let rob_id = value.0; - let vbank_id = value.1; - let start_addr = value.2; - let data_count = value.3.len() / 2; - - // Convert vbank_id to pbank_id using BMT - let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { - if pbank_ids.is_empty() { - vbank_id - } else { - pbank_ids[0] + match serde_json::from_str::<(u64, u64, u64, Vec)>(&incoming_message.content) { + Ok(value) => { + let rob_id = value.0; + let vbank_id = value.1; + let start_addr = value.2; + let data_count = value.3.len() / 2; + + // Convert vbank_id to pbank_id using BMT + let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { + if pbank_ids.is_empty() { + vbank_id + } else { + pbank_ids[0] + } + } else { + vbank_id + }; + + // Check dependency + if scoreboard::check_dependency(pbank_id, rob_id) { + // No dependency, can proceed immediately + self + .write_request_queue + .push(("tdma".to_string(), incoming_message.content.clone())); + } else { + // Has dependency, add to scoreboard + scoreboard::add_to_scoreboard(rob_id, pbank_id, "tdma".to_string(), incoming_message.content.clone()); + } + + self.records.push(ModelRecord { + time: services.global_time(), + action: "enqueue_tdma_write".to_string(), + subject: format!( + "rob_id={}, bank={}, addr={}, count={}", + rob_id, vbank_id, start_addr, data_count + ), + }); + + self.until_next_event = 1.0; + }, + Err(_) => { + // Failed to deserialize TDMA write request, skipping this request } - } else { - vbank_id - }; - - // Check dependency - if scoreboard::check_dependency(pbank_id, rob_id) { - // No dependency, can proceed immediately - self - .write_request_queue - .push(("tdma".to_string(), incoming_message.content.clone())); - } else { - // Has dependency, add to scoreboard - scoreboard::add_to_scoreboard(rob_id, pbank_id, "tdma".to_string(), incoming_message.content.clone()); } - - self.records.push(ModelRecord { - time: services.global_time(), - action: "enqueue_tdma_write".to_string(), - subject: format!( - "rob_id={}, bank={}, addr={}, count={}", - rob_id, vbank_id, start_addr, data_count - ), - }); - - self.until_next_event = 1.0; return Ok(()); } // Handle write requests from VectorBall (multi-cycle) if incoming_message.port_name == self.vball_write_req_port { - // Parse request: (rob_id, vbank_id, start_addr, data_u64) - let value: (u64, u64, u64, Vec) = - serde_json::from_str(&incoming_message.content).map_err(|_| SimulationError::InvalidModelState)?; - let rob_id = value.0; - let vbank_id = value.1; - let start_addr = value.2; - let data_count = value.3.len() / 2; - - // Convert vbank_id to pbank_id using BMT - let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { - if pbank_ids.is_empty() { - vbank_id - } else { - pbank_ids[0] + match serde_json::from_str::<(u64, u64, u64, Vec)>(&incoming_message.content) { + Ok(value) => { + let rob_id = value.0; + let vbank_id = value.1; + let start_addr = value.2; + let data_count = value.3.len() / 2; + + // Convert vbank_id to pbank_id using BMT + let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { + if pbank_ids.is_empty() { + vbank_id + } else { + pbank_ids[0] + } + } else { + vbank_id + }; + + // Check dependency + if scoreboard::check_dependency(pbank_id, rob_id) { + // No dependency, can proceed immediately + self + .write_request_queue + .push(("vecball".to_string(), incoming_message.content.clone())); + } else { + // Has dependency, add to scoreboard + scoreboard::add_to_scoreboard( + rob_id, + pbank_id, + "vecball".to_string(), + incoming_message.content.clone(), + ); + } + + self.records.push(ModelRecord { + time: services.global_time(), + action: "enqueue_vball_write".to_string(), + subject: format!( + "rob_id={}, bank={}, addr={}, count={}", + rob_id, vbank_id, start_addr, data_count + ), + }); + + self.until_next_event = 1.0; + }, + Err(_) => { + // Failed to deserialize VectorBall write request, skipping } - } else { - vbank_id - }; - - // Check dependency - if scoreboard::check_dependency(pbank_id, rob_id) { - // No dependency, can proceed immediately - self - .write_request_queue - .push(("vecball".to_string(), incoming_message.content.clone())); - } else { - // Has dependency, add to scoreboard - scoreboard::add_to_scoreboard( - rob_id, - pbank_id, - "vecball".to_string(), - incoming_message.content.clone(), - ); } - - self.records.push(ModelRecord { - time: services.global_time(), - action: "enqueue_vball_write".to_string(), - subject: format!( - "rob_id={}, bank={}, addr={}, count={}", - rob_id, vbank_id, start_addr, data_count - ), - }); - - self.until_next_event = 1.0; return Ok(()); } // Handle read responses from Bank - forward to the correct source (multi-cycle) if incoming_message.port_name == self.bank_read_resp_port { - let data_vec: Vec = - serde_json::from_str(&incoming_message.content).map_err(|_| SimulationError::InvalidModelState)?; - - // Get source from queue (FIFO) - if let Some(source) = READ_SOURCE_QUEUE.lock().unwrap().pop() { - READ_RESPONSE_QUEUE - .lock() - .unwrap() - .push(ReadResponse { source, data: data_vec }); - self.until_next_event = 1.0; + match serde_json::from_str::>(&incoming_message.content) { + Ok(data_vec) => { + // Get source from queue (FIFO) + if let Some(source) = READ_SOURCE_QUEUE.lock().unwrap().pop() { + let source_clone = source.clone(); + let data_len = data_vec.len(); + + READ_RESPONSE_QUEUE + .lock() + .unwrap() + .push(ReadResponse { source, data: data_vec }); + + self.until_next_event = 1.0; + } + }, + Err(_) => { + // Failed to deserialize bank read response, skipping + } } return Ok(()); } @@ -197,89 +213,107 @@ impl DevsModel for MemController { self.vball_read_resp_port.clone() }; - messages.push(ModelMessage { - content: serde_json::to_string(&resp.data).map_err(|_| SimulationError::InvalidModelState)?, - port_name: response_port, - }); - - self.records.push(ModelRecord { - time: services.global_time(), - action: "forward_read_resp".to_string(), - subject: format!("to {}", resp.source), - }); - - // Schedule next event - if !READ_RESPONSE_QUEUE.lock().unwrap().is_empty() - || !self.write_request_queue.is_empty() - || scoreboard::get_pending_count() > 0 - { - self.until_next_event = 1.0; - } else { - self.until_next_event = INFINITY; + match serde_json::to_string(&resp.data) { + Ok(content) => { + messages.push(ModelMessage { + content, + port_name: response_port, + }); + + self.records.push(ModelRecord { + time: services.global_time(), + action: "forward_read_resp".to_string(), + subject: format!("to {}", resp.source), + }); + }, + Err(_) => { + // Failed to serialize read response, skipping + } } + // Schedule next event + self.until_next_event = 1.0; return Ok(messages); } // Check scoreboard for ready requests (each cycle, unified judgment) let ready_request = scoreboard::get_one_ready_request(); if let Some((rob_id, pbank_id, source, json_content)) = ready_request { - self.write_request_queue.push((source, json_content)); + if !json_content.is_empty() { + self.write_request_queue.push((source, json_content)); + self.until_next_event = 1.0; + } else { + // Skipping empty request from scoreboard + } } // Process one write request if available if !self.write_request_queue.is_empty() { let (source, json_content) = self.write_request_queue.remove(0); - // Parse request: (rob_id, vbank_id, start_addr, data_u64) - let value: (u64, u64, u64, Vec) = - serde_json::from_str(&json_content).map_err(|_| SimulationError::InvalidModelState)?; - let rob_id = value.0; - let vbank_id = value.1; - let start_addr = value.2; - let data_u64 = value.3; - - // Convert vbank_id to pbank_id using BMT - // Use first pbank_id if vbank maps to multiple pbanks - let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { - if pbank_ids.is_empty() { - vbank_id - } else { - pbank_ids[0] + match serde_json::from_str::<(u64, u64, u64, Vec)>(&json_content) { + Ok(value) => { + let rob_id = value.0; + let vbank_id = value.1; + let start_addr = value.2; + let data_u64 = value.3; + + // Convert vbank_id to pbank_id using BMT + // Use first pbank_id if vbank maps to multiple pbanks + let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { + if pbank_ids.is_empty() { + vbank_id + } else { + pbank_ids[0] + } + } else { + vbank_id + }; + + // Mark as in-flight + scoreboard::mark_in_flight(pbank_id, rob_id); + + // Re-encode with pbank_id (remove rob_id for bank) + let request = (pbank_id, start_addr, data_u64); + match serde_json::to_string(&request) { + Ok(new_content) => { + messages.push(ModelMessage { + content: new_content, + port_name: self.bank_write_req_port.clone(), + }); + + self.records.push(ModelRecord { + time: services.global_time(), + action: "forward_write_req".to_string(), + subject: format!( + "from {}, rob_id={}, vbank={}->pbank={}", + source, rob_id, vbank_id, pbank_id + ), + }); + + // Bank write is synchronous (single cycle), mark as completed immediately + scoreboard::mark_completed(pbank_id); + + // Check if there are ready read requests that can now proceed (unified judgment each cycle) + let ready_read = scoreboard::get_one_ready_read_request(); + if let Some((read_rob_id, read_pbank_id, read_start_addr, read_count, read_source)) = ready_read { + READ_SOURCE_QUEUE.lock().unwrap().push(read_source.clone()); + request_read_bank(read_pbank_id, read_start_addr, read_count); + self.until_next_event = 1.0; + } + }, + Err(_) => { + // Failed to serialize bank write request, skipping + // Mark as completed to avoid blocking + scoreboard::mark_completed(pbank_id); + self.until_next_event = 1.0; + } + } + }, + Err(_) => { + // Failed to deserialize write request, skipping + self.until_next_event = 1.0; } - } else { - vbank_id - }; - - // Mark as in-flight - scoreboard::mark_in_flight(pbank_id, rob_id); - - // Re-encode with pbank_id (remove rob_id for bank) - let request = (pbank_id, start_addr, data_u64); - let new_content = serde_json::to_string(&request).map_err(|_| SimulationError::InvalidModelState)?; - - messages.push(ModelMessage { - content: new_content, - port_name: self.bank_write_req_port.clone(), - }); - - self.records.push(ModelRecord { - time: services.global_time(), - action: "forward_write_req".to_string(), - subject: format!( - "from {}, rob_id={}, vbank={}->pbank={}", - source, rob_id, vbank_id, pbank_id - ), - }); - - // Bank write is synchronous (single cycle), mark as completed immediately - scoreboard::mark_completed(pbank_id); - - // Check if there are ready read requests that can now proceed (unified judgment each cycle) - let ready_read = scoreboard::get_one_ready_read_request(); - if let Some((read_rob_id, read_pbank_id, read_start_addr, read_count, read_source)) = ready_read { - READ_SOURCE_QUEUE.lock().unwrap().push(read_source.clone()); - request_read_bank(read_pbank_id, read_start_addr, read_count); } } @@ -327,6 +361,20 @@ impl SerializableModel for MemController { } } +/// Check if MemController has any pending operations +/// Returns true if write_request_queue is empty and READ_RESPONSE_QUEUE is empty +pub fn is_mem_ctrl_idle() -> bool { + let read_response_queue_empty = { + let queue = READ_RESPONSE_QUEUE.lock().unwrap(); + queue.is_empty() + }; + let read_source_queue_empty = { + let queue = READ_SOURCE_QUEUE.lock().unwrap(); + queue.is_empty() + }; + read_response_queue_empty && read_source_queue_empty +} + pub fn request_read_bank_for_tdma(vbank_id: u64, start_addr: u64, count: u64, rob_id: u64) { // Convert vbank_id to pbank_id using BMT // Use first pbank_id if vbank maps to multiple pbanks diff --git a/bebop/src/arch/buckyball/rob.rs b/bebop/src/arch/buckyball/rob.rs index b0b6ae1..244ccf7 100644 --- a/bebop/src/arch/buckyball/rob.rs +++ b/bebop/src/arch/buckyball/rob.rs @@ -13,6 +13,11 @@ use crate::arch::buckyball::mset::MSET_INST_CAN_ISSUE; use crate::arch::buckyball::tdma_loader::MVIN_INST_CAN_ISSUE; use crate::arch::buckyball::tdma_storer::MVOUT_INST_CAN_ISSUE; use crate::arch::buckyball::vecball::VECBALL_INST_CAN_ISSUE; +use crate::arch::buckyball::scoreboard; +use crate::arch::buckyball::mem_ctrl; +use crate::arch::buckyball::tdma_loader; +use crate::arch::buckyball::tdma_storer; +use crate::arch::buckyball::vecball; #[derive(PartialEq, Debug, Clone, Serialize, Deserialize)] enum EntryStatus { @@ -94,9 +99,19 @@ impl DevsModel for Rob { fn events_int(&mut self, services: &mut Services) -> Result, SimulationError> { if is_empty(&mut self.rob_buffer) { if FENCE_CSR.load(Ordering::Relaxed) { - FENCE_CSR.store(false, Ordering::Relaxed); - send_cmd_response(0u64); - self.until_next_event = INFINITY; + let all_idle = scoreboard::is_all_memory_complete() + && mem_ctrl::is_mem_ctrl_idle() + && tdma_loader::is_tdma_loader_idle() + && tdma_storer::is_tdma_storer_idle() + && vecball::is_vecball_idle(); + + if all_idle { + FENCE_CSR.store(false, Ordering::Relaxed); + send_cmd_response(0u64); + self.until_next_event = INFINITY; + } else { + self.until_next_event = 1.0; + } } } else { self.until_next_event = 1.0; diff --git a/bebop/src/arch/buckyball/rs.rs b/bebop/src/arch/buckyball/rs.rs index 3b04a49..720042f 100644 --- a/bebop/src/arch/buckyball/rs.rs +++ b/bebop/src/arch/buckyball/rs.rs @@ -64,27 +64,36 @@ impl DevsModel for Rs { } } - fn events_int(&mut self, _services: &mut Services) -> Result, SimulationError> { + fn events_int(&mut self, services: &mut Services) -> Result, SimulationError> { + let mut remaining_instructions = Vec::new(); for inst in self.inst_buffer.drain(..) { match inst.funct { 23 => { if MSET_INST_CAN_ISSUE.load(Ordering::Relaxed) { receive_mset_inst(inst.xs1, inst.xs2, inst.rob_id); + } else { + remaining_instructions.push(inst); } }, 24 => { if MVIN_INST_CAN_ISSUE.load(Ordering::Relaxed) { receive_mvin_inst(inst.xs1, inst.xs2, inst.rob_id); + } else { + remaining_instructions.push(inst); } }, 25 => { if MVOUT_INST_CAN_ISSUE.load(Ordering::Relaxed) { receive_mvout_inst(inst.xs1, inst.xs2, inst.rob_id); + } else { + remaining_instructions.push(inst); } }, 30 => { if VECBALL_INST_CAN_ISSUE.load(Ordering::Relaxed) { receive_vecball_inst(inst.xs1, inst.xs2, inst.rob_id); + } else { + remaining_instructions.push(inst); } }, _ => { @@ -93,7 +102,14 @@ impl DevsModel for Rs { } } - self.until_next_event = INFINITY; + self.inst_buffer = remaining_instructions; + + if !self.inst_buffer.is_empty() { + self.until_next_event = 1.0; + } else { + self.until_next_event = INFINITY; + } + Ok(Vec::new()) } diff --git a/bebop/src/arch/buckyball/scoreboard.rs b/bebop/src/arch/buckyball/scoreboard.rs index ae74c8a..894204b 100644 --- a/bebop/src/arch/buckyball/scoreboard.rs +++ b/bebop/src/arch/buckyball/scoreboard.rs @@ -287,3 +287,29 @@ pub fn get_pending_count() -> usize { 0 } } + +/// Get number of pending read requests in read scoreboard +pub fn get_pending_read_count() -> usize { + let read_scoreboard_opt = READ_SCOREBOARD.lock().unwrap(); + if let Some(ref read_scoreboard) = *read_scoreboard_opt { + read_scoreboard.values().map(|v| v.len()).sum() + } else { + 0 + } +} + +/// Get number of in-flight requests +pub fn get_in_flight_count() -> usize { + let in_flight_opt = IN_FLIGHT_REQUESTS.lock().unwrap(); + if let Some(ref in_flight) = *in_flight_opt { + in_flight.len() + } else { + 0 + } +} + +/// Check if all memory operations are complete +/// Returns true if there are no pending requests, no pending read requests, and no in-flight requests +pub fn is_all_memory_complete() -> bool { + get_pending_count() == 0 && get_pending_read_count() == 0 && get_in_flight_count() == 0 +} diff --git a/bebop/src/arch/buckyball/tdma_loader.rs b/bebop/src/arch/buckyball/tdma_loader.rs index e64cda7..6e544be 100644 --- a/bebop/src/arch/buckyball/tdma_loader.rs +++ b/bebop/src/arch/buckyball/tdma_loader.rs @@ -26,6 +26,8 @@ struct MvinInstData { static MVIN_INST_DATA: Mutex> = Mutex::new(None); +static TDMA_LOADER_STATE: Mutex = Mutex::new(TdmaLoaderState::Idle); + #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] enum TdmaLoaderState { Idle, @@ -82,6 +84,7 @@ impl DevsModel for TdmaLoader { if self.state == TdmaLoaderState::Wait { // Write request has been accepted, move to Active self.state = TdmaLoaderState::Active; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Active; self.until_next_event = 0.0; } Ok(()) @@ -119,6 +122,7 @@ impl DevsModel for TdmaLoader { scoreboard::reserve_write_request(inst.rob_id, pbank_id); self.state = TdmaLoaderState::Wait; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Wait; self.until_next_event = 1.0; } else { self.until_next_event = INFINITY; @@ -129,33 +133,51 @@ impl DevsModel for TdmaLoader { // Read DRAM data and send write request let mut data_u64 = Vec::new(); for i in 0..self.depth { - let dram_addr = self.base_dram_addr + i * 16 * self.stride; + // 当stride=0时,使用默认步长1,避免所有数据都从同一个地址读取 + let stride = if self.stride == 0 { 1 } else { self.stride }; + // 每次读取16字节数据,步长16 + let dram_addr = self.base_dram_addr + i * 16 * stride; let (data_lo, data_hi) = dma_read_dram(dram_addr); data_u64.push(data_lo); data_u64.push(data_hi); } let request = (self.rob_id, self.vbank_id, 0u64, data_u64); - messages.push(ModelMessage { - content: serde_json::to_string(&request).map_err(|_| SimulationError::InvalidModelState)?, - port_name: self.write_bank_req_port.clone(), - }); - - model_record!( - self, - services, - "write_bank", - format!("id={}, count={}", self.vbank_id, self.depth) - ); + match serde_json::to_string(&request) { + Ok(content) => { + messages.push(ModelMessage { + content, + port_name: self.write_bank_req_port.clone(), + }); + + model_record!( + self, + services, + "write_bank", + format!("id={}, count={}", self.vbank_id, self.depth) + ); + }, + Err(e) => { + println!("[ERROR] Failed to serialize TDMA write request: {:?}, skipping", e); + // Mark as completed to avoid blocking + self.state = TdmaLoaderState::Complete; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Complete; + self.until_next_event = 0.0; + return Ok(messages); + } + } - // Wait state: until_next_event should always be 1.0 - // This state waits for external event (write completion) - self.until_next_event = 1.0; + // 直接转换到Active状态,不等待MemController的响应 + // 因为MemController的设计是同步处理写请求的 + self.state = TdmaLoaderState::Active; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Active; + self.until_next_event = 0.0; }, TdmaLoaderState::Active => { // Write request has been accepted, now wait for transfer latency self.until_next_event = self.transfer_latency * self.depth as f64; self.state = TdmaLoaderState::Complete; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Complete; }, TdmaLoaderState::Complete => { messages.push(ModelMessage { @@ -167,6 +189,7 @@ impl DevsModel for TdmaLoader { MVIN_INST_CAN_ISSUE.store(true, Ordering::Relaxed); self.state = TdmaLoaderState::Idle; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Idle; self.until_next_event = INFINITY; }, } @@ -182,9 +205,6 @@ impl DevsModel for TdmaLoader { if self.state == TdmaLoaderState::Idle && MVIN_INST_DATA.lock().unwrap().is_some() { return 0.0; } - if self.state == TdmaLoaderState::Wait { - return 1.0; - } self.until_next_event } } @@ -211,10 +231,11 @@ impl SerializableModel for TdmaLoader { /// --- Helper Functions --- /// ------------------------------------------------------------ fn decode_inst(xs1: u64, xs2: u64) -> (u64, u64, u64, u64) { - let base_dram_addr = (xs1 & 0xffffffff) as u64; - let stride = ((xs2 >> 24) & 0x3ff) as u64; - let depth = ((xs2 >> 8) & 0xffff) as u64; - let vbank_id = (xs2 & 0xff) as u64; + let base_dram_addr = xs1; // 使用完整的64位地址 + // 根据bb_mvin宏的定义解析参数:bank_id(5位) | depth(10位) | stride(19位) + let vbank_id = (xs2 & 0x1f) as u64; // 低5位 + let depth = ((xs2 >> 5) & 0x3ff) as u64; // 中间10位 + let stride = ((xs2 >> 15) & 0x7ffff) as u64; // 高19位 (base_dram_addr, stride, depth, vbank_id) } @@ -240,11 +261,29 @@ fn dma_read_dram(dram_addr: u64) -> (u64, u64) { let handler_opt = DMA_READ_HANDLER.lock().unwrap(); if let Some(handler) = handler_opt.as_ref() { let mut h = handler.lock().unwrap(); - let data = h.read(dram_addr, 16).unwrap_or(0); - let data_lo = data as u64; - let data_hi = (data >> 64) as u64; - (data_lo, data_hi) + // 直接使用DmaReadResp的原始数据结构,避免数据转换错误 + // 首先发送读取请求 + if h.send_read_request(dram_addr, 16).is_ok() { + // 然后接收响应,获取原始的data_lo和data_hi + match h.recv_read_response() { + Ok(data) => { + // 正确拆分u128为两个u64 + let data_lo = data as u64; + let data_hi = (data >> 64) as u64; + (data_lo, data_hi) + }, + Err(_) => { + (0, 0) + } + } + } else { + (0, 0) + } } else { (0, 0) } } + +pub fn is_tdma_loader_idle() -> bool { + *TDMA_LOADER_STATE.lock().unwrap() == TdmaLoaderState::Idle +} diff --git a/bebop/src/arch/buckyball/tdma_storer.rs b/bebop/src/arch/buckyball/tdma_storer.rs index 8eac685..a45d82b 100644 --- a/bebop/src/arch/buckyball/tdma_storer.rs +++ b/bebop/src/arch/buckyball/tdma_storer.rs @@ -27,6 +27,8 @@ struct MvoutInstData { static MVOUT_INST_DATA: Mutex> = Mutex::new(None); +static TDMA_STORER_STATE: Mutex = Mutex::new(TdmaStorerState::Idle); + #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] enum TdmaStorerState { Idle, @@ -79,22 +81,31 @@ impl DevsModel for TdmaStorer { fn events_ext(&mut self, incoming_message: &ModelMessage, services: &mut Services) -> Result<(), SimulationError> { if incoming_message.port_name == self.read_bank_resp_port { if self.state != TdmaStorerState::Wait { - return Err(SimulationError::InvalidModelState); + return Ok(()); } - let data_vec: Vec = - serde_json::from_str(&incoming_message.content).map_err(|_| SimulationError::InvalidModelState)?; - - for (i, &data) in data_vec.iter().enumerate() { - let dram_addr = self.base_dram_addr + (i as u64) * 16 * self.stride; - dma_write_dram(dram_addr, data); + match serde_json::from_str::>(&incoming_message.content) { + Ok(data_vec) => { + for (i, &data) in data_vec.iter().enumerate() { + let dram_addr = self.base_dram_addr + (i as u64) * 16 * self.stride; + dma_write_dram(dram_addr, data); + } + + model_record!(self, services, "write_dram", format!("count={}", data_vec.len())); + + self.state = TdmaStorerState::Active; + *TDMA_STORER_STATE.lock().unwrap() = TdmaStorerState::Active; + self.until_next_event = self.transfer_latency * self.depth as f64; + }, + Err(_) => { + // Reset state to Idle to allow new instructions + MVOUT_INST_CAN_ISSUE.store(true, Ordering::Relaxed); + self.state = TdmaStorerState::Idle; + *TDMA_STORER_STATE.lock().unwrap() = TdmaStorerState::Idle; + self.until_next_event = INFINITY; + } } - model_record!(self, services, "write_dram", format!("count={}", data_vec.len())); - - self.state = TdmaStorerState::Active; - self.until_next_event = self.transfer_latency * self.depth as f64; - return Ok(()); } @@ -130,6 +141,7 @@ impl DevsModel for TdmaStorer { ); self.state = TdmaStorerState::Wait; + *TDMA_STORER_STATE.lock().unwrap() = TdmaStorerState::Wait; self.until_next_event = 1.0; } }, @@ -150,18 +162,27 @@ impl DevsModel for TdmaStorer { }, TdmaStorerState::Active => { self.state = TdmaStorerState::Complete; + *TDMA_STORER_STATE.lock().unwrap() = TdmaStorerState::Complete; self.until_next_event = 1.0; }, TdmaStorerState::Complete => { - messages.push(ModelMessage { - content: serde_json::to_string(&self.rob_id).map_err(|_| SimulationError::InvalidModelState)?, - port_name: self.commit_to_rob_port.clone(), - }); - - model_record!(self, services, "commit_mvout", format!("rob_id={}", self.rob_id)); + match serde_json::to_string(&self.rob_id) { + Ok(content) => { + messages.push(ModelMessage { + content, + port_name: self.commit_to_rob_port.clone(), + }); + + model_record!(self, services, "commit_mvout", format!("rob_id={}", self.rob_id)); + }, + Err(_) => { + // Failed to serialize commit message, skipping + } + } MVOUT_INST_CAN_ISSUE.store(true, Ordering::Relaxed); self.state = TdmaStorerState::Idle; + *TDMA_STORER_STATE.lock().unwrap() = TdmaStorerState::Idle; self.until_next_event = INFINITY; }, } @@ -206,10 +227,11 @@ impl SerializableModel for TdmaStorer { /// --- Helper Functions --- /// ------------------------------------------------------------ fn decode_inst(xs1: u64, xs2: u64) -> (u64, u64, u64, u64) { - let base_dram_addr = (xs1 & 0xffffffff) as u64; - let stride = ((xs2 >> 24) & 0x3ff) as u64; - let depth = ((xs2 >> 8) & 0xffff) as u64; - let vbank_id = (xs2 & 0xff) as u64; + let base_dram_addr = xs1; // 使用完整的64位地址 + // 根据bb_mvin宏的定义解析参数:bank_id(5位) | depth(10位) | stride(19位) + let vbank_id = (xs2 & 0x1f) as u64; // 低5位 + let depth = ((xs2 >> 5) & 0x3ff) as u64; // 中间10位 + let stride = ((xs2 >> 15) & 0x7ffff) as u64; // 高19位 (base_dram_addr, stride, depth, vbank_id) } @@ -242,3 +264,7 @@ fn dma_write_dram(dram_addr: u64, data: u128) { let _ = h.write(dram_addr, data, 16); } } + +pub fn is_tdma_storer_idle() -> bool { + *TDMA_STORER_STATE.lock().unwrap() == TdmaStorerState::Idle +} diff --git a/bebop/src/arch/buckyball/vecball.rs b/bebop/src/arch/buckyball/vecball.rs index 99a8710..7b884a3 100644 --- a/bebop/src/arch/buckyball/vecball.rs +++ b/bebop/src/arch/buckyball/vecball.rs @@ -22,6 +22,8 @@ struct VecballInstData { static VECBALL_INST_DATA: Mutex> = Mutex::new(None); +static VECBALL_STATE: Mutex = Mutex::new(VecBallState::Idle); + // VectorBall states for matrix multiplication pipeline #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] enum VecBallState { @@ -118,6 +120,7 @@ impl DevsModel for VectorBall { // Now request operand 2 self.state = VecBallState::WaitOp2; + *VECBALL_STATE.lock().unwrap() = VecBallState::WaitOp2; self.until_next_event = 1.0; }, VecBallState::WaitOp2 => { @@ -132,6 +135,7 @@ impl DevsModel for VectorBall { // Start computing self.state = VecBallState::Computing; + *VECBALL_STATE.lock().unwrap() = VecBallState::Computing; self.until_next_event = self.compute_latency; }, _ => {}, @@ -160,6 +164,7 @@ impl DevsModel for VectorBall { // Start by requesting operand 1 (all 16 elements at once) self.state = VecBallState::WaitOp1; + *VECBALL_STATE.lock().unwrap() = VecBallState::WaitOp1; self.until_next_event = 1.0; self.records.push(ModelRecord { @@ -242,6 +247,7 @@ impl DevsModel for VectorBall { // Move to wait for write response self.state = VecBallState::WaitWriteResp; + *VECBALL_STATE.lock().unwrap() = VecBallState::WaitWriteResp; self.until_next_event = self.write_latency; }, VecBallState::WaitWriteResp => { @@ -267,6 +273,7 @@ impl DevsModel for VectorBall { }); self.state = VecBallState::Idle; + *VECBALL_STATE.lock().unwrap() = VecBallState::Idle; self.until_next_event = 1.0; VECBALL_INST_CAN_ISSUE.store(true, Ordering::Relaxed); } @@ -341,3 +348,7 @@ pub fn receive_vecball_inst(xs1: u64, xs2: u64, rob_id: u64) { // Mark as busy VECBALL_INST_CAN_ISSUE.store(false, Ordering::Relaxed); } + +pub fn is_vecball_idle() -> bool { + *VECBALL_STATE.lock().unwrap() == VecBallState::Idle +} From 139e6972d3eaf5084456c493a964ac93c8fd6eca Mon Sep 17 00:00:00 2001 From: dyy <1533208939@qq.com> Date: Sun, 8 Feb 2026 22:19:14 +0800 Subject: [PATCH 3/3] fix mvinmvout --- bebop/src/arch/buckyball/bank.rs | 69 +++-- bebop/src/arch/buckyball/mem_ctrl.rs | 366 ++++++++++++++---------- bebop/src/arch/buckyball/rob.rs | 21 +- bebop/src/arch/buckyball/rs.rs | 20 +- bebop/src/arch/buckyball/scoreboard.rs | 26 ++ bebop/src/arch/buckyball/tdma_loader.rs | 91 ++++-- bebop/src/arch/buckyball/tdma_storer.rs | 70 +++-- bebop/src/arch/buckyball/vecball.rs | 11 + 8 files changed, 433 insertions(+), 241 deletions(-) diff --git a/bebop/src/arch/buckyball/bank.rs b/bebop/src/arch/buckyball/bank.rs index 273fad1..e8e2331 100644 --- a/bebop/src/arch/buckyball/bank.rs +++ b/bebop/src/arch/buckyball/bank.rs @@ -116,34 +116,38 @@ impl Bank { impl DevsModel for Bank { fn events_ext(&mut self, incoming_message: &ModelMessage, services: &mut Services) -> Result<(), SimulationError> { if incoming_message.port_name == self.write_bank_req_port { - let value: (u64, u64, Vec) = - serde_json::from_str(&incoming_message.content).map_err(|_| SimulationError::InvalidModelState)?; - - let vbank_id = value.0; - let start_addr = value.1; - let data_u64 = value.2; - - let mut data_vec = Vec::new(); - for i in (0..data_u64.len()).step_by(2) { - if i + 1 < data_u64.len() { - let lo = data_u64[i]; - let hi = data_u64[i + 1]; - data_vec.push((hi as u128) << 64 | (lo as u128)); + match serde_json::from_str::<(u64, u64, Vec)>(&incoming_message.content) { + Ok(value) => { + let vbank_id = value.0; + let start_addr = value.1; + let data_u64 = value.2; + + let mut data_vec = Vec::new(); + for i in (0..data_u64.len()).step_by(2) { + if i + 1 < data_u64.len() { + let lo = data_u64[i]; + let hi = data_u64[i + 1]; + data_vec.push((hi as u128) << 64 | (lo as u128)); + } + } + + if vbank_id < self.banks.len() as u64 { + self.banks[vbank_id as usize].write_batch(start_addr, &data_vec); + self.sync_bank_data(); + + model_record!( + self, + services, + "write_bank", + format!("id={}, count={}", vbank_id, data_vec.len()) + ); + } + }, + Err(_) => { + // Failed to deserialize write request, skipping this request } } - if vbank_id < self.banks.len() as u64 { - self.banks[vbank_id as usize].write_batch(start_addr, &data_vec); - self.sync_bank_data(); - - model_record!( - self, - services, - "write_bank", - format!("id={}, count={}", vbank_id, data_vec.len()) - ); - } - return Ok(()); } @@ -159,10 +163,17 @@ impl DevsModel for Bank { }); for data_vec in ready_responses { - messages.push(ModelMessage { - content: serde_json::to_string(&data_vec).map_err(|_| SimulationError::InvalidModelState)?, - port_name: self.read_bank_resp_port.clone(), - }); + match serde_json::to_string(&data_vec) { + Ok(content) => { + messages.push(ModelMessage { + content, + port_name: self.read_bank_resp_port.clone(), + }); + }, + Err(_) => { + // Failed to serialize read response, skipping this response + } + } } self.until_next_event = INFINITY; diff --git a/bebop/src/arch/buckyball/mem_ctrl.rs b/bebop/src/arch/buckyball/mem_ctrl.rs index db9820a..dcdf6b4 100644 --- a/bebop/src/arch/buckyball/mem_ctrl.rs +++ b/bebop/src/arch/buckyball/mem_ctrl.rs @@ -71,111 +71,127 @@ impl DevsModel for MemController { fn events_ext(&mut self, incoming_message: &ModelMessage, services: &mut Services) -> Result<(), SimulationError> { // Handle write requests from TDMA (multi-cycle) if incoming_message.port_name == self.tdma_write_req_port { - // Parse request: (rob_id, vbank_id, start_addr, data_u64) - let value: (u64, u64, u64, Vec) = - serde_json::from_str(&incoming_message.content).map_err(|_| SimulationError::InvalidModelState)?; - let rob_id = value.0; - let vbank_id = value.1; - let start_addr = value.2; - let data_count = value.3.len() / 2; - - // Convert vbank_id to pbank_id using BMT - let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { - if pbank_ids.is_empty() { - vbank_id - } else { - pbank_ids[0] + match serde_json::from_str::<(u64, u64, u64, Vec)>(&incoming_message.content) { + Ok(value) => { + let rob_id = value.0; + let vbank_id = value.1; + let start_addr = value.2; + let data_count = value.3.len() / 2; + + // Convert vbank_id to pbank_id using BMT + let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { + if pbank_ids.is_empty() { + vbank_id + } else { + pbank_ids[0] + } + } else { + vbank_id + }; + + // Check dependency + if scoreboard::check_dependency(pbank_id, rob_id) { + // No dependency, can proceed immediately + self + .write_request_queue + .push(("tdma".to_string(), incoming_message.content.clone())); + } else { + // Has dependency, add to scoreboard + scoreboard::add_to_scoreboard(rob_id, pbank_id, "tdma".to_string(), incoming_message.content.clone()); + } + + self.records.push(ModelRecord { + time: services.global_time(), + action: "enqueue_tdma_write".to_string(), + subject: format!( + "rob_id={}, bank={}, addr={}, count={}", + rob_id, vbank_id, start_addr, data_count + ), + }); + + self.until_next_event = 1.0; + }, + Err(_) => { + // Failed to deserialize TDMA write request, skipping this request } - } else { - vbank_id - }; - - // Check dependency - if scoreboard::check_dependency(pbank_id, rob_id) { - // No dependency, can proceed immediately - self - .write_request_queue - .push(("tdma".to_string(), incoming_message.content.clone())); - } else { - // Has dependency, add to scoreboard - scoreboard::add_to_scoreboard(rob_id, pbank_id, "tdma".to_string(), incoming_message.content.clone()); } - - self.records.push(ModelRecord { - time: services.global_time(), - action: "enqueue_tdma_write".to_string(), - subject: format!( - "rob_id={}, bank={}, addr={}, count={}", - rob_id, vbank_id, start_addr, data_count - ), - }); - - self.until_next_event = 1.0; return Ok(()); } // Handle write requests from VectorBall (multi-cycle) if incoming_message.port_name == self.vball_write_req_port { - // Parse request: (rob_id, vbank_id, start_addr, data_u64) - let value: (u64, u64, u64, Vec) = - serde_json::from_str(&incoming_message.content).map_err(|_| SimulationError::InvalidModelState)?; - let rob_id = value.0; - let vbank_id = value.1; - let start_addr = value.2; - let data_count = value.3.len() / 2; - - // Convert vbank_id to pbank_id using BMT - let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { - if pbank_ids.is_empty() { - vbank_id - } else { - pbank_ids[0] + match serde_json::from_str::<(u64, u64, u64, Vec)>(&incoming_message.content) { + Ok(value) => { + let rob_id = value.0; + let vbank_id = value.1; + let start_addr = value.2; + let data_count = value.3.len() / 2; + + // Convert vbank_id to pbank_id using BMT + let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { + if pbank_ids.is_empty() { + vbank_id + } else { + pbank_ids[0] + } + } else { + vbank_id + }; + + // Check dependency + if scoreboard::check_dependency(pbank_id, rob_id) { + // No dependency, can proceed immediately + self + .write_request_queue + .push(("vecball".to_string(), incoming_message.content.clone())); + } else { + // Has dependency, add to scoreboard + scoreboard::add_to_scoreboard( + rob_id, + pbank_id, + "vecball".to_string(), + incoming_message.content.clone(), + ); + } + + self.records.push(ModelRecord { + time: services.global_time(), + action: "enqueue_vball_write".to_string(), + subject: format!( + "rob_id={}, bank={}, addr={}, count={}", + rob_id, vbank_id, start_addr, data_count + ), + }); + + self.until_next_event = 1.0; + }, + Err(_) => { + // Failed to deserialize VectorBall write request, skipping } - } else { - vbank_id - }; - - // Check dependency - if scoreboard::check_dependency(pbank_id, rob_id) { - // No dependency, can proceed immediately - self - .write_request_queue - .push(("vecball".to_string(), incoming_message.content.clone())); - } else { - // Has dependency, add to scoreboard - scoreboard::add_to_scoreboard( - rob_id, - pbank_id, - "vecball".to_string(), - incoming_message.content.clone(), - ); } - - self.records.push(ModelRecord { - time: services.global_time(), - action: "enqueue_vball_write".to_string(), - subject: format!( - "rob_id={}, bank={}, addr={}, count={}", - rob_id, vbank_id, start_addr, data_count - ), - }); - - self.until_next_event = 1.0; return Ok(()); } // Handle read responses from Bank - forward to the correct source (multi-cycle) if incoming_message.port_name == self.bank_read_resp_port { - let data_vec: Vec = - serde_json::from_str(&incoming_message.content).map_err(|_| SimulationError::InvalidModelState)?; - - // Get source from queue (FIFO) - if let Some(source) = READ_SOURCE_QUEUE.lock().unwrap().pop() { - READ_RESPONSE_QUEUE - .lock() - .unwrap() - .push(ReadResponse { source, data: data_vec }); - self.until_next_event = 1.0; + match serde_json::from_str::>(&incoming_message.content) { + Ok(data_vec) => { + // Get source from queue (FIFO) + if let Some(source) = READ_SOURCE_QUEUE.lock().unwrap().pop() { + let source_clone = source.clone(); + let data_len = data_vec.len(); + + READ_RESPONSE_QUEUE + .lock() + .unwrap() + .push(ReadResponse { source, data: data_vec }); + + self.until_next_event = 1.0; + } + }, + Err(_) => { + // Failed to deserialize bank read response, skipping + } } return Ok(()); } @@ -197,89 +213,107 @@ impl DevsModel for MemController { self.vball_read_resp_port.clone() }; - messages.push(ModelMessage { - content: serde_json::to_string(&resp.data).map_err(|_| SimulationError::InvalidModelState)?, - port_name: response_port, - }); - - self.records.push(ModelRecord { - time: services.global_time(), - action: "forward_read_resp".to_string(), - subject: format!("to {}", resp.source), - }); - - // Schedule next event - if !READ_RESPONSE_QUEUE.lock().unwrap().is_empty() - || !self.write_request_queue.is_empty() - || scoreboard::get_pending_count() > 0 - { - self.until_next_event = 1.0; - } else { - self.until_next_event = INFINITY; + match serde_json::to_string(&resp.data) { + Ok(content) => { + messages.push(ModelMessage { + content, + port_name: response_port, + }); + + self.records.push(ModelRecord { + time: services.global_time(), + action: "forward_read_resp".to_string(), + subject: format!("to {}", resp.source), + }); + }, + Err(_) => { + // Failed to serialize read response, skipping + } } + // Schedule next event + self.until_next_event = 1.0; return Ok(messages); } // Check scoreboard for ready requests (each cycle, unified judgment) let ready_request = scoreboard::get_one_ready_request(); if let Some((rob_id, pbank_id, source, json_content)) = ready_request { - self.write_request_queue.push((source, json_content)); + if !json_content.is_empty() { + self.write_request_queue.push((source, json_content)); + self.until_next_event = 1.0; + } else { + // Skipping empty request from scoreboard + } } // Process one write request if available if !self.write_request_queue.is_empty() { let (source, json_content) = self.write_request_queue.remove(0); - // Parse request: (rob_id, vbank_id, start_addr, data_u64) - let value: (u64, u64, u64, Vec) = - serde_json::from_str(&json_content).map_err(|_| SimulationError::InvalidModelState)?; - let rob_id = value.0; - let vbank_id = value.1; - let start_addr = value.2; - let data_u64 = value.3; - - // Convert vbank_id to pbank_id using BMT - // Use first pbank_id if vbank maps to multiple pbanks - let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { - if pbank_ids.is_empty() { - vbank_id - } else { - pbank_ids[0] + match serde_json::from_str::<(u64, u64, u64, Vec)>(&json_content) { + Ok(value) => { + let rob_id = value.0; + let vbank_id = value.1; + let start_addr = value.2; + let data_u64 = value.3; + + // Convert vbank_id to pbank_id using BMT + // Use first pbank_id if vbank maps to multiple pbanks + let pbank_id = if let Some(pbank_ids) = get_pbank_ids(vbank_id) { + if pbank_ids.is_empty() { + vbank_id + } else { + pbank_ids[0] + } + } else { + vbank_id + }; + + // Mark as in-flight + scoreboard::mark_in_flight(pbank_id, rob_id); + + // Re-encode with pbank_id (remove rob_id for bank) + let request = (pbank_id, start_addr, data_u64); + match serde_json::to_string(&request) { + Ok(new_content) => { + messages.push(ModelMessage { + content: new_content, + port_name: self.bank_write_req_port.clone(), + }); + + self.records.push(ModelRecord { + time: services.global_time(), + action: "forward_write_req".to_string(), + subject: format!( + "from {}, rob_id={}, vbank={}->pbank={}", + source, rob_id, vbank_id, pbank_id + ), + }); + + // Bank write is synchronous (single cycle), mark as completed immediately + scoreboard::mark_completed(pbank_id); + + // Check if there are ready read requests that can now proceed (unified judgment each cycle) + let ready_read = scoreboard::get_one_ready_read_request(); + if let Some((read_rob_id, read_pbank_id, read_start_addr, read_count, read_source)) = ready_read { + READ_SOURCE_QUEUE.lock().unwrap().push(read_source.clone()); + request_read_bank(read_pbank_id, read_start_addr, read_count); + self.until_next_event = 1.0; + } + }, + Err(_) => { + // Failed to serialize bank write request, skipping + // Mark as completed to avoid blocking + scoreboard::mark_completed(pbank_id); + self.until_next_event = 1.0; + } + } + }, + Err(_) => { + // Failed to deserialize write request, skipping + self.until_next_event = 1.0; } - } else { - vbank_id - }; - - // Mark as in-flight - scoreboard::mark_in_flight(pbank_id, rob_id); - - // Re-encode with pbank_id (remove rob_id for bank) - let request = (pbank_id, start_addr, data_u64); - let new_content = serde_json::to_string(&request).map_err(|_| SimulationError::InvalidModelState)?; - - messages.push(ModelMessage { - content: new_content, - port_name: self.bank_write_req_port.clone(), - }); - - self.records.push(ModelRecord { - time: services.global_time(), - action: "forward_write_req".to_string(), - subject: format!( - "from {}, rob_id={}, vbank={}->pbank={}", - source, rob_id, vbank_id, pbank_id - ), - }); - - // Bank write is synchronous (single cycle), mark as completed immediately - scoreboard::mark_completed(pbank_id); - - // Check if there are ready read requests that can now proceed (unified judgment each cycle) - let ready_read = scoreboard::get_one_ready_read_request(); - if let Some((read_rob_id, read_pbank_id, read_start_addr, read_count, read_source)) = ready_read { - READ_SOURCE_QUEUE.lock().unwrap().push(read_source.clone()); - request_read_bank(read_pbank_id, read_start_addr, read_count); } } @@ -327,6 +361,20 @@ impl SerializableModel for MemController { } } +/// Check if MemController has any pending operations +/// Returns true if write_request_queue is empty and READ_RESPONSE_QUEUE is empty +pub fn is_mem_ctrl_idle() -> bool { + let read_response_queue_empty = { + let queue = READ_RESPONSE_QUEUE.lock().unwrap(); + queue.is_empty() + }; + let read_source_queue_empty = { + let queue = READ_SOURCE_QUEUE.lock().unwrap(); + queue.is_empty() + }; + read_response_queue_empty && read_source_queue_empty +} + pub fn request_read_bank_for_tdma(vbank_id: u64, start_addr: u64, count: u64, rob_id: u64) { // Convert vbank_id to pbank_id using BMT // Use first pbank_id if vbank maps to multiple pbanks diff --git a/bebop/src/arch/buckyball/rob.rs b/bebop/src/arch/buckyball/rob.rs index b0b6ae1..244ccf7 100644 --- a/bebop/src/arch/buckyball/rob.rs +++ b/bebop/src/arch/buckyball/rob.rs @@ -13,6 +13,11 @@ use crate::arch::buckyball::mset::MSET_INST_CAN_ISSUE; use crate::arch::buckyball::tdma_loader::MVIN_INST_CAN_ISSUE; use crate::arch::buckyball::tdma_storer::MVOUT_INST_CAN_ISSUE; use crate::arch::buckyball::vecball::VECBALL_INST_CAN_ISSUE; +use crate::arch::buckyball::scoreboard; +use crate::arch::buckyball::mem_ctrl; +use crate::arch::buckyball::tdma_loader; +use crate::arch::buckyball::tdma_storer; +use crate::arch::buckyball::vecball; #[derive(PartialEq, Debug, Clone, Serialize, Deserialize)] enum EntryStatus { @@ -94,9 +99,19 @@ impl DevsModel for Rob { fn events_int(&mut self, services: &mut Services) -> Result, SimulationError> { if is_empty(&mut self.rob_buffer) { if FENCE_CSR.load(Ordering::Relaxed) { - FENCE_CSR.store(false, Ordering::Relaxed); - send_cmd_response(0u64); - self.until_next_event = INFINITY; + let all_idle = scoreboard::is_all_memory_complete() + && mem_ctrl::is_mem_ctrl_idle() + && tdma_loader::is_tdma_loader_idle() + && tdma_storer::is_tdma_storer_idle() + && vecball::is_vecball_idle(); + + if all_idle { + FENCE_CSR.store(false, Ordering::Relaxed); + send_cmd_response(0u64); + self.until_next_event = INFINITY; + } else { + self.until_next_event = 1.0; + } } } else { self.until_next_event = 1.0; diff --git a/bebop/src/arch/buckyball/rs.rs b/bebop/src/arch/buckyball/rs.rs index 3b04a49..720042f 100644 --- a/bebop/src/arch/buckyball/rs.rs +++ b/bebop/src/arch/buckyball/rs.rs @@ -64,27 +64,36 @@ impl DevsModel for Rs { } } - fn events_int(&mut self, _services: &mut Services) -> Result, SimulationError> { + fn events_int(&mut self, services: &mut Services) -> Result, SimulationError> { + let mut remaining_instructions = Vec::new(); for inst in self.inst_buffer.drain(..) { match inst.funct { 23 => { if MSET_INST_CAN_ISSUE.load(Ordering::Relaxed) { receive_mset_inst(inst.xs1, inst.xs2, inst.rob_id); + } else { + remaining_instructions.push(inst); } }, 24 => { if MVIN_INST_CAN_ISSUE.load(Ordering::Relaxed) { receive_mvin_inst(inst.xs1, inst.xs2, inst.rob_id); + } else { + remaining_instructions.push(inst); } }, 25 => { if MVOUT_INST_CAN_ISSUE.load(Ordering::Relaxed) { receive_mvout_inst(inst.xs1, inst.xs2, inst.rob_id); + } else { + remaining_instructions.push(inst); } }, 30 => { if VECBALL_INST_CAN_ISSUE.load(Ordering::Relaxed) { receive_vecball_inst(inst.xs1, inst.xs2, inst.rob_id); + } else { + remaining_instructions.push(inst); } }, _ => { @@ -93,7 +102,14 @@ impl DevsModel for Rs { } } - self.until_next_event = INFINITY; + self.inst_buffer = remaining_instructions; + + if !self.inst_buffer.is_empty() { + self.until_next_event = 1.0; + } else { + self.until_next_event = INFINITY; + } + Ok(Vec::new()) } diff --git a/bebop/src/arch/buckyball/scoreboard.rs b/bebop/src/arch/buckyball/scoreboard.rs index ae74c8a..894204b 100644 --- a/bebop/src/arch/buckyball/scoreboard.rs +++ b/bebop/src/arch/buckyball/scoreboard.rs @@ -287,3 +287,29 @@ pub fn get_pending_count() -> usize { 0 } } + +/// Get number of pending read requests in read scoreboard +pub fn get_pending_read_count() -> usize { + let read_scoreboard_opt = READ_SCOREBOARD.lock().unwrap(); + if let Some(ref read_scoreboard) = *read_scoreboard_opt { + read_scoreboard.values().map(|v| v.len()).sum() + } else { + 0 + } +} + +/// Get number of in-flight requests +pub fn get_in_flight_count() -> usize { + let in_flight_opt = IN_FLIGHT_REQUESTS.lock().unwrap(); + if let Some(ref in_flight) = *in_flight_opt { + in_flight.len() + } else { + 0 + } +} + +/// Check if all memory operations are complete +/// Returns true if there are no pending requests, no pending read requests, and no in-flight requests +pub fn is_all_memory_complete() -> bool { + get_pending_count() == 0 && get_pending_read_count() == 0 && get_in_flight_count() == 0 +} diff --git a/bebop/src/arch/buckyball/tdma_loader.rs b/bebop/src/arch/buckyball/tdma_loader.rs index e64cda7..6e544be 100644 --- a/bebop/src/arch/buckyball/tdma_loader.rs +++ b/bebop/src/arch/buckyball/tdma_loader.rs @@ -26,6 +26,8 @@ struct MvinInstData { static MVIN_INST_DATA: Mutex> = Mutex::new(None); +static TDMA_LOADER_STATE: Mutex = Mutex::new(TdmaLoaderState::Idle); + #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] enum TdmaLoaderState { Idle, @@ -82,6 +84,7 @@ impl DevsModel for TdmaLoader { if self.state == TdmaLoaderState::Wait { // Write request has been accepted, move to Active self.state = TdmaLoaderState::Active; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Active; self.until_next_event = 0.0; } Ok(()) @@ -119,6 +122,7 @@ impl DevsModel for TdmaLoader { scoreboard::reserve_write_request(inst.rob_id, pbank_id); self.state = TdmaLoaderState::Wait; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Wait; self.until_next_event = 1.0; } else { self.until_next_event = INFINITY; @@ -129,33 +133,51 @@ impl DevsModel for TdmaLoader { // Read DRAM data and send write request let mut data_u64 = Vec::new(); for i in 0..self.depth { - let dram_addr = self.base_dram_addr + i * 16 * self.stride; + // 当stride=0时,使用默认步长1,避免所有数据都从同一个地址读取 + let stride = if self.stride == 0 { 1 } else { self.stride }; + // 每次读取16字节数据,步长16 + let dram_addr = self.base_dram_addr + i * 16 * stride; let (data_lo, data_hi) = dma_read_dram(dram_addr); data_u64.push(data_lo); data_u64.push(data_hi); } let request = (self.rob_id, self.vbank_id, 0u64, data_u64); - messages.push(ModelMessage { - content: serde_json::to_string(&request).map_err(|_| SimulationError::InvalidModelState)?, - port_name: self.write_bank_req_port.clone(), - }); - - model_record!( - self, - services, - "write_bank", - format!("id={}, count={}", self.vbank_id, self.depth) - ); + match serde_json::to_string(&request) { + Ok(content) => { + messages.push(ModelMessage { + content, + port_name: self.write_bank_req_port.clone(), + }); + + model_record!( + self, + services, + "write_bank", + format!("id={}, count={}", self.vbank_id, self.depth) + ); + }, + Err(e) => { + println!("[ERROR] Failed to serialize TDMA write request: {:?}, skipping", e); + // Mark as completed to avoid blocking + self.state = TdmaLoaderState::Complete; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Complete; + self.until_next_event = 0.0; + return Ok(messages); + } + } - // Wait state: until_next_event should always be 1.0 - // This state waits for external event (write completion) - self.until_next_event = 1.0; + // 直接转换到Active状态,不等待MemController的响应 + // 因为MemController的设计是同步处理写请求的 + self.state = TdmaLoaderState::Active; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Active; + self.until_next_event = 0.0; }, TdmaLoaderState::Active => { // Write request has been accepted, now wait for transfer latency self.until_next_event = self.transfer_latency * self.depth as f64; self.state = TdmaLoaderState::Complete; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Complete; }, TdmaLoaderState::Complete => { messages.push(ModelMessage { @@ -167,6 +189,7 @@ impl DevsModel for TdmaLoader { MVIN_INST_CAN_ISSUE.store(true, Ordering::Relaxed); self.state = TdmaLoaderState::Idle; + *TDMA_LOADER_STATE.lock().unwrap() = TdmaLoaderState::Idle; self.until_next_event = INFINITY; }, } @@ -182,9 +205,6 @@ impl DevsModel for TdmaLoader { if self.state == TdmaLoaderState::Idle && MVIN_INST_DATA.lock().unwrap().is_some() { return 0.0; } - if self.state == TdmaLoaderState::Wait { - return 1.0; - } self.until_next_event } } @@ -211,10 +231,11 @@ impl SerializableModel for TdmaLoader { /// --- Helper Functions --- /// ------------------------------------------------------------ fn decode_inst(xs1: u64, xs2: u64) -> (u64, u64, u64, u64) { - let base_dram_addr = (xs1 & 0xffffffff) as u64; - let stride = ((xs2 >> 24) & 0x3ff) as u64; - let depth = ((xs2 >> 8) & 0xffff) as u64; - let vbank_id = (xs2 & 0xff) as u64; + let base_dram_addr = xs1; // 使用完整的64位地址 + // 根据bb_mvin宏的定义解析参数:bank_id(5位) | depth(10位) | stride(19位) + let vbank_id = (xs2 & 0x1f) as u64; // 低5位 + let depth = ((xs2 >> 5) & 0x3ff) as u64; // 中间10位 + let stride = ((xs2 >> 15) & 0x7ffff) as u64; // 高19位 (base_dram_addr, stride, depth, vbank_id) } @@ -240,11 +261,29 @@ fn dma_read_dram(dram_addr: u64) -> (u64, u64) { let handler_opt = DMA_READ_HANDLER.lock().unwrap(); if let Some(handler) = handler_opt.as_ref() { let mut h = handler.lock().unwrap(); - let data = h.read(dram_addr, 16).unwrap_or(0); - let data_lo = data as u64; - let data_hi = (data >> 64) as u64; - (data_lo, data_hi) + // 直接使用DmaReadResp的原始数据结构,避免数据转换错误 + // 首先发送读取请求 + if h.send_read_request(dram_addr, 16).is_ok() { + // 然后接收响应,获取原始的data_lo和data_hi + match h.recv_read_response() { + Ok(data) => { + // 正确拆分u128为两个u64 + let data_lo = data as u64; + let data_hi = (data >> 64) as u64; + (data_lo, data_hi) + }, + Err(_) => { + (0, 0) + } + } + } else { + (0, 0) + } } else { (0, 0) } } + +pub fn is_tdma_loader_idle() -> bool { + *TDMA_LOADER_STATE.lock().unwrap() == TdmaLoaderState::Idle +} diff --git a/bebop/src/arch/buckyball/tdma_storer.rs b/bebop/src/arch/buckyball/tdma_storer.rs index 8eac685..a45d82b 100644 --- a/bebop/src/arch/buckyball/tdma_storer.rs +++ b/bebop/src/arch/buckyball/tdma_storer.rs @@ -27,6 +27,8 @@ struct MvoutInstData { static MVOUT_INST_DATA: Mutex> = Mutex::new(None); +static TDMA_STORER_STATE: Mutex = Mutex::new(TdmaStorerState::Idle); + #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] enum TdmaStorerState { Idle, @@ -79,22 +81,31 @@ impl DevsModel for TdmaStorer { fn events_ext(&mut self, incoming_message: &ModelMessage, services: &mut Services) -> Result<(), SimulationError> { if incoming_message.port_name == self.read_bank_resp_port { if self.state != TdmaStorerState::Wait { - return Err(SimulationError::InvalidModelState); + return Ok(()); } - let data_vec: Vec = - serde_json::from_str(&incoming_message.content).map_err(|_| SimulationError::InvalidModelState)?; - - for (i, &data) in data_vec.iter().enumerate() { - let dram_addr = self.base_dram_addr + (i as u64) * 16 * self.stride; - dma_write_dram(dram_addr, data); + match serde_json::from_str::>(&incoming_message.content) { + Ok(data_vec) => { + for (i, &data) in data_vec.iter().enumerate() { + let dram_addr = self.base_dram_addr + (i as u64) * 16 * self.stride; + dma_write_dram(dram_addr, data); + } + + model_record!(self, services, "write_dram", format!("count={}", data_vec.len())); + + self.state = TdmaStorerState::Active; + *TDMA_STORER_STATE.lock().unwrap() = TdmaStorerState::Active; + self.until_next_event = self.transfer_latency * self.depth as f64; + }, + Err(_) => { + // Reset state to Idle to allow new instructions + MVOUT_INST_CAN_ISSUE.store(true, Ordering::Relaxed); + self.state = TdmaStorerState::Idle; + *TDMA_STORER_STATE.lock().unwrap() = TdmaStorerState::Idle; + self.until_next_event = INFINITY; + } } - model_record!(self, services, "write_dram", format!("count={}", data_vec.len())); - - self.state = TdmaStorerState::Active; - self.until_next_event = self.transfer_latency * self.depth as f64; - return Ok(()); } @@ -130,6 +141,7 @@ impl DevsModel for TdmaStorer { ); self.state = TdmaStorerState::Wait; + *TDMA_STORER_STATE.lock().unwrap() = TdmaStorerState::Wait; self.until_next_event = 1.0; } }, @@ -150,18 +162,27 @@ impl DevsModel for TdmaStorer { }, TdmaStorerState::Active => { self.state = TdmaStorerState::Complete; + *TDMA_STORER_STATE.lock().unwrap() = TdmaStorerState::Complete; self.until_next_event = 1.0; }, TdmaStorerState::Complete => { - messages.push(ModelMessage { - content: serde_json::to_string(&self.rob_id).map_err(|_| SimulationError::InvalidModelState)?, - port_name: self.commit_to_rob_port.clone(), - }); - - model_record!(self, services, "commit_mvout", format!("rob_id={}", self.rob_id)); + match serde_json::to_string(&self.rob_id) { + Ok(content) => { + messages.push(ModelMessage { + content, + port_name: self.commit_to_rob_port.clone(), + }); + + model_record!(self, services, "commit_mvout", format!("rob_id={}", self.rob_id)); + }, + Err(_) => { + // Failed to serialize commit message, skipping + } + } MVOUT_INST_CAN_ISSUE.store(true, Ordering::Relaxed); self.state = TdmaStorerState::Idle; + *TDMA_STORER_STATE.lock().unwrap() = TdmaStorerState::Idle; self.until_next_event = INFINITY; }, } @@ -206,10 +227,11 @@ impl SerializableModel for TdmaStorer { /// --- Helper Functions --- /// ------------------------------------------------------------ fn decode_inst(xs1: u64, xs2: u64) -> (u64, u64, u64, u64) { - let base_dram_addr = (xs1 & 0xffffffff) as u64; - let stride = ((xs2 >> 24) & 0x3ff) as u64; - let depth = ((xs2 >> 8) & 0xffff) as u64; - let vbank_id = (xs2 & 0xff) as u64; + let base_dram_addr = xs1; // 使用完整的64位地址 + // 根据bb_mvin宏的定义解析参数:bank_id(5位) | depth(10位) | stride(19位) + let vbank_id = (xs2 & 0x1f) as u64; // 低5位 + let depth = ((xs2 >> 5) & 0x3ff) as u64; // 中间10位 + let stride = ((xs2 >> 15) & 0x7ffff) as u64; // 高19位 (base_dram_addr, stride, depth, vbank_id) } @@ -242,3 +264,7 @@ fn dma_write_dram(dram_addr: u64, data: u128) { let _ = h.write(dram_addr, data, 16); } } + +pub fn is_tdma_storer_idle() -> bool { + *TDMA_STORER_STATE.lock().unwrap() == TdmaStorerState::Idle +} diff --git a/bebop/src/arch/buckyball/vecball.rs b/bebop/src/arch/buckyball/vecball.rs index 99a8710..7b884a3 100644 --- a/bebop/src/arch/buckyball/vecball.rs +++ b/bebop/src/arch/buckyball/vecball.rs @@ -22,6 +22,8 @@ struct VecballInstData { static VECBALL_INST_DATA: Mutex> = Mutex::new(None); +static VECBALL_STATE: Mutex = Mutex::new(VecBallState::Idle); + // VectorBall states for matrix multiplication pipeline #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] enum VecBallState { @@ -118,6 +120,7 @@ impl DevsModel for VectorBall { // Now request operand 2 self.state = VecBallState::WaitOp2; + *VECBALL_STATE.lock().unwrap() = VecBallState::WaitOp2; self.until_next_event = 1.0; }, VecBallState::WaitOp2 => { @@ -132,6 +135,7 @@ impl DevsModel for VectorBall { // Start computing self.state = VecBallState::Computing; + *VECBALL_STATE.lock().unwrap() = VecBallState::Computing; self.until_next_event = self.compute_latency; }, _ => {}, @@ -160,6 +164,7 @@ impl DevsModel for VectorBall { // Start by requesting operand 1 (all 16 elements at once) self.state = VecBallState::WaitOp1; + *VECBALL_STATE.lock().unwrap() = VecBallState::WaitOp1; self.until_next_event = 1.0; self.records.push(ModelRecord { @@ -242,6 +247,7 @@ impl DevsModel for VectorBall { // Move to wait for write response self.state = VecBallState::WaitWriteResp; + *VECBALL_STATE.lock().unwrap() = VecBallState::WaitWriteResp; self.until_next_event = self.write_latency; }, VecBallState::WaitWriteResp => { @@ -267,6 +273,7 @@ impl DevsModel for VectorBall { }); self.state = VecBallState::Idle; + *VECBALL_STATE.lock().unwrap() = VecBallState::Idle; self.until_next_event = 1.0; VECBALL_INST_CAN_ISSUE.store(true, Ordering::Relaxed); } @@ -341,3 +348,7 @@ pub fn receive_vecball_inst(xs1: u64, xs2: u64, rob_id: u64) { // Mark as busy VECBALL_INST_CAN_ISSUE.store(false, Ordering::Relaxed); } + +pub fn is_vecball_idle() -> bool { + *VECBALL_STATE.lock().unwrap() == VecBallState::Idle +}