diff --git a/Makefile b/Makefile index 27f98f21..39830516 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ clean-deps: SW_HEX := sw/bin/helloworld.hex $(SW_HEX): sw/*.c sw/*.h sw/*.S sw/*.ld - $(MAKE) -C sw/ compile + $(MAKE) -C sw/ compile DEFINES=$(DEFINES) ## Build all top-level programs in sw/ software: $(SW_HEX) diff --git a/rtl/tb_croc_soc.sv b/rtl/tb_croc_soc.sv index b87f6646..06500d87 100644 --- a/rtl/tb_croc_soc.sv +++ b/rtl/tb_croc_soc.sv @@ -438,6 +438,11 @@ module tb_croc_soc #( ///////////////// logic [31:0] tb_data; + int unsigned cycle_count; + + always @(posedge clk) begin + cycle_count <= cycle_count + 1; + end initial begin $timeformat(-9, 0, "ns", 12); // 1: scale (ns=-9), 2: decimals, 3: suffix, 4: print-field width @@ -470,9 +475,11 @@ module tb_croc_soc #( // resume core jtag_resume(); + cycle_count = 0; // wait for non-zero return value (written into core status register) $display("@%t | [CORE] Wait for end of code...", $time); jtag_wait_for_eoc(tb_data); + $display("Program cycles: %d", cycle_count); // finish simulation repeat(50) @(posedge clk); diff --git a/sw/Makefile b/sw/Makefile index f5b65cd9..52b04225 100644 --- a/sw/Makefile +++ b/sw/Makefile @@ -12,7 +12,7 @@ INCDIR ?= lib/inc # Toolchain RISCV_XLEN ?= 32 -RISCV_MARCH ?= rv$(RISCV_XLEN)i_zicsr +RISCV_MARCH ?= rv$(RISCV_XLEN)im_zicsr RISCV_MABI ?= ilp32 RISCV_PREFIX ?= riscv64-unknown-elf- RISCV_CC ?= $(RISCV_PREFIX)gcc @@ -25,7 +25,8 @@ RISCV_LD ?= $(RISCV_PREFIX)ld RISCV_STRIP ?= $(RISCV_PREFIX)strip RISCV_FLAGS ?= -march=$(RISCV_MARCH) -mabi=$(RISCV_MABI) -mcmodel=medany -static -std=gnu99 -Os -nostdlib -fno-builtin -ffreestanding -RISCV_CCFLAGS ?= $(RISCV_FLAGS) -Iinclude -I$(INCDIR) -I$(CURDIR) +DEFINES ?= +RISCV_CCFLAGS ?= $(RISCV_FLAGS) -Iinclude -I$(INCDIR) -I$(CURDIR) $(DEFINES) RISCV_LDFLAGS ?= -static -nostartfiles -lm -lgcc -lc $(RISCV_FLAGS) # all diff --git a/sw/dot_product.c b/sw/dot_product.c new file mode 100644 index 00000000..37728bb1 --- /dev/null +++ b/sw/dot_product.c @@ -0,0 +1,47 @@ +#include "mac.h" +#include + +int32_t dot_product(const int32_t *x, const int32_t *y, int n) { + int32_t acc = 0; + for (int i = 0; i < n; ++i) { + MAC(acc, x[i], y[i]); + } + return acc; +} +static int32_t a[200] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, + 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, + 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, + 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, + 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, + 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, + 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, + 196, 197, 198, 199, 200}; + +static int32_t b[200] = { + 200, 199, 198, 197, 196, 195, 194, 193, 192, 191, 190, 189, 188, 187, 186, + 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, 172, 171, + 170, 169, 168, 167, 166, 165, 164, 163, 162, 161, 160, 159, 158, 157, 156, + 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 145, 144, 143, 142, 141, + 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 130, 129, 128, 127, 126, + 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, + 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, + 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, + 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, + 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, + 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, + 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, + 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, + 5, 4, 3, 2, 1}; + +int main(void) { + int32_t expected = 1353400; + + return dot_product(a, b, 200) == expected ? 1 : -1; +} diff --git a/sw/lib/inc/mac.h b/sw/lib/inc/mac.h index 1e84165e..00e0e1c2 100644 --- a/sw/lib/inc/mac.h +++ b/sw/lib/inc/mac.h @@ -6,8 +6,12 @@ #define MAC_FUNCT3 0x0 #define MAC_FUNCT7 0x9 // R type for gnu assembler: opcode, func3, func7, rd, rs1, rs2 +#ifndef BASELINE #define MAC(a, b, c) \ asm volatile(".insn r %1, %2, %3, %0, %4, %5" \ : "+&r"(a) \ : "i"(MAC_OPCODE), "i"(MAC_FUNCT3), "i"(MAC_FUNCT7), "r"(b), \ "r"(c)) +#else +#define MAC(a, b, c) ((a) += (b) * (c)) +#endif diff --git a/sw/matrix_vector_mul.c b/sw/matrix_vector_mul.c new file mode 100644 index 00000000..accdefce --- /dev/null +++ b/sw/matrix_vector_mul.c @@ -0,0 +1,72 @@ +#include "mac.h" +#include + +// Your dot_product function, as in your example +int32_t dot_product(const int32_t *x, const int32_t *y, int n) { + int32_t acc = 0; + for (int i = 0; i < n; ++i) { + MAC(acc, x[i], y[i]); + } + return acc; +} + +// 5x100 matrix, mat[r][c] = (r+1) * (c+1) +static const int32_t mat[5][100] = { + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, + 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, + 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100}, + {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, + 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, + 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, + 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, + 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, + 182, 184, 186, 188, 190, 192, 194, 196, 198, 200}, + {3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, + 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, + 93, 96, 99, 102, 105, 108, 111, 114, 117, 120, 123, 126, 129, 132, 135, + 138, 141, 144, 147, 150, 153, 156, 159, 162, 165, 168, 171, 174, 177, 180, + 183, 186, 189, 192, 195, 198, 201, 204, 207, 210, 213, 216, 219, 222, 225, + 228, 231, 234, 237, 240, 243, 246, 249, 252, 255, 258, 261, 264, 267, 270, + 273, 276, 279, 282, 285, 288, 291, 294, 297, 300}, + {4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, + 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, + 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, + 244, 248, 252, 256, 260, 264, 268, 272, 276, 280, 284, 288, 292, 296, 300, + 304, 308, 312, 316, 320, 324, 328, 332, 336, 340, 344, 348, 352, 356, 360, + 364, 368, 372, 376, 380, 384, 388, 392, 396, 400}, + {5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, + 80, 85, 90, 95, 100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, + 155, 160, 165, 170, 175, 180, 185, 190, 195, 200, 205, 210, 215, 220, 225, + 230, 235, 240, 245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300, + 305, 310, 315, 320, 325, 330, 335, 340, 345, 350, 355, 360, 365, 370, 375, + 380, 385, 390, 395, 400, 405, 410, 415, 420, 425, 430, 435, 440, 445, 450, + 455, 460, 465, 470, 475, 480, 485, 490, 495, 500}}; + +// 100-element vector, vec[c] = c+1 +static const int32_t vec[100] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, + 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, + 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100}; + +// Compute matrix-vector multiplication using dot_product +void matvec(const int32_t m[5][100], const int32_t v[100], int32_t out[5]) { + for (int r = 0; r < 5; ++r) { + out[r] = dot_product(m[r], v, 100); + } +} + +int main(void) { + int32_t out[5]; + + matvec(mat, vec, out); + + return out[0] == 338350 ? 1 : -1; +} diff --git a/sw/simple-mac.c b/sw/simple-mac.c index 10deeda1..8c9868c8 100644 --- a/sw/simple-mac.c +++ b/sw/simple-mac.c @@ -1,14 +1,45 @@ #include "mac.h" #include +#define MIN_INT (1 << 31) +#define MAX_INT (~MIN_INT) + static void mac_baseline(int32_t *a, int32_t b, int32_t c) { *a += b * c; } -int main() { - int32_t a_base = 9; - int32_t a_mac = a_base; - int32_t b = 3; - int32_t c = 5; - mac_baseline(&a_base, b, c); +int failed_case = -1; +int test_case = 0; +static int check_mac(int32_t a, int32_t b, int32_t c) { + int32_t a_mac = a; + ++test_case; + mac_baseline(&a, b, c); MAC(a_mac, b, c); - return a_base == a_mac ? 1 : -1; + if (a != a_mac && failed_case == -1) { + failed_case = test_case; + } +} +int main() { + + // Standard positive numbers + check_mac(5, 10, 20); + // Zero multiplier + check_mac(100, 0, 50); + // Zero multiplicand + check_mac(10, 50, 0); + // Negative multiplicand + check_mac(7, 3, -4); + // Negative multiplier + check_mac(15, -2, 8); + // All zeros + check_mac(0, 0, 0); + // Large numbers + check_mac(100000, 20000, 3000); + // Negative acc + check_mac(-20, 2, 15); + // max positive + check_mac(0, MAX_INT, 1); + // max negative + check_mac(0, 1, MIN_INT); + + // returns -1 on success, the number of the failed test case on failure + return failed_case; }