Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
/ds4
/ds4-server
/ds4-bench
/ds4-bench.exe
/ds4_native
/ds4_server_test
/ds4_test
/ds4flash.gguf
/TODO.md
/gguf/
*.o
*.exe
*.dSYM/
# Generated MSVC import lib for the Windows ROCm build (regenerated from the
# HIP SDK's libhipblas.dll by win/build-rocm.sh).
/win/third_party/hipblas.lib
/misc/
.*.swp
.DS_Store
60 changes: 58 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
CC ?= cc
UNAME_S := $(shell uname -s)

# On MinGW/MSYS `uname -s` is e.g. MINGW64_NT-10.0 or MSYS_NT-10.0.
IS_WINDOWS := $(filter MINGW% MSYS%,$(UNAME_S))

# MinGW has no `cc`; default the compiler to gcc there (still overridable).
ifneq ($(IS_WINDOWS),)
ifeq ($(origin CC),default)
CC := gcc
endif
endif

ifeq ($(UNAME_S),Darwin)
NATIVE_CPU_FLAG ?= -mcpu=native
else
NATIVE_CPU_FLAG ?= -march=native
endif

# Native Windows (MinGW-w64) CPU build flags. ds4.c pulls in the dependency-free
# POSIX shim (ds4_win.h) behind #ifdef _WIN32; no extra -I/-include is needed.
WIN_CFLAGS ?= -O3 -ffast-math $(NATIVE_CPU_FLAG) -std=c99 -D_GNU_SOURCE \
-fno-finite-math-only -DDS4_NO_GPU -D_CRT_SECURE_NO_WARNINGS
WIN_LDLIBS ?= -lm

CFLAGS ?= -O3 -ffast-math $(NATIVE_CPU_FLAG) -Wall -Wextra -std=c99
OBJCFLAGS ?= -O3 -ffast-math $(NATIVE_CPU_FLAG) -Wall -Wextra -fobjc-arc

Expand Down Expand Up @@ -57,7 +73,7 @@ METAL_LDLIBS := $(LDLIBS)

endif

.PHONY: all help clean test cpu cuda cuda-spark cuda-generic cuda-regression
.PHONY: all help clean test cpu cuda cuda-spark cuda-generic cuda-regression windows-cpu

ifeq ($(UNAME_S),Darwin)
all: ds4 ds4-server ds4-bench
Expand Down Expand Up @@ -85,6 +101,46 @@ cpu: ds4_cli_cpu.o ds4_server_cpu.o ds4_bench_cpu.o linenoise.o rax.o $(CPU_CORE

cuda-regression:
@echo "cuda-regression requires a CUDA build"

else ifneq ($(IS_WINDOWS),)
# ---- Native Windows (MinGW-w64 / HIP-clang) -------------------------------
# CPU bench builds with MinGW. The GPU (ROCm/HIP) bench builds with the AMD HIP
# SDK for gfx1151. The CLI (linenoise/termios + sigaction) and server (BSD
# sockets/poll) still need Windows ports; see win/README.md.
#
# Windows ROCm/HIP build settings. hipcc.exe's .bat wrapper splits args on
# spaces, so the actual compile/link is delegated to win/build-rocm.sh, which
# relies on the SDK's default include search and a space-free import-lib dir.
ROCM_PATH ?= C:/Program Files/AMD/ROCm/7.1
ROCM_ARCH ?= gfx1151

all: help

help:
@echo "DS4 build targets (native Windows):"
@echo " make windows-cpu Build native Windows CPU ./ds4-bench.exe (MinGW)"
@echo " make windows-rocm Build native Windows ROCm ./ds4-bench.exe (HIP, gfx1151)"
@echo " make clean Remove build outputs"
@echo ""
@echo " windows-rocm uses the AMD HIP SDK (ROCM_PATH=$(ROCM_PATH),"
@echo " ROCM_ARCH=$(ROCM_ARCH)). See win/README.md for the rocWMMA vendoring"
@echo " step and run caveats."
@echo ""
@echo " ds4 (CLI) and ds4-server are not yet ported to Windows."

windows-cpu: ds4-bench.exe

ds4-bench.exe: ds4_bench.c ds4.c ds4.h ds4_gpu.h ds4_win.h
$(CC) $(WIN_CFLAGS) -c -o ds4_cpu.o ds4.c
$(CC) $(WIN_CFLAGS) -c -o ds4_bench_cpu.o ds4_bench.c
$(CC) $(WIN_CFLAGS) -o $@ ds4_bench_cpu.o ds4_cpu.o $(WIN_LDLIBS)

# Native Windows ROCm/HIP ds4-bench.exe (gfx1151). Delegates to the build
# script to work around hipcc.exe's space-splitting argument wrapper.
.PHONY: windows-rocm
windows-rocm:
ROCM_PATH="$(ROCM_PATH)" ROCM_ARCH="$(ROCM_ARCH)" bash win/build-rocm.sh

else
all: help

Expand Down Expand Up @@ -194,5 +250,5 @@ test: ds4_test
./ds4_test

clean:
rm -f ds4 ds4-server ds4-bench ds4_cpu ds4_native ds4_server_test ds4_test *.o tests/cuda_long_context_smoke tests/cuda_long_context_smoke.o
rm -f ds4 ds4-server ds4-bench ds4_cpu ds4_native ds4_server_test ds4_test *.o *.exe tests/cuda_long_context_smoke tests/cuda_long_context_smoke.o

24 changes: 24 additions & 0 deletions ds4.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,43 @@
#include <inttypes.h>
#include <ctype.h>
#include <math.h>
#if defined(_WIN32) && defined(DS4_WIN_PTHREAD)
/* Native Windows GPU (HIP/MSVC-ABI) build: MSVC has no <pthread.h>; use the
* Win32 pthread shim. The MinGW CPU build (no DS4_WIN_PTHREAD) keeps real
* winpthreads, so its behavior is unchanged. */
#include "win/ds4_pthread_win.h"
#else
#include <pthread.h>
#endif
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
/* Native Windows CPU (MinGW-w64) and GPU (HIP/clang-MSVC) builds: a small
* dependency-free POSIX shim supplies mmap/flock/pread/sysconf/dprintf/
* fmemopen. See ds4_win.h. */
#include "ds4_win.h"
#include <sys/stat.h>
#include <stdarg.h>
#include <time.h>
#if defined(__MINGW32__)
#include <unistd.h> /* MinGW provides POSIX unistd surface */
#else
#include <io.h> /* MSVC-ABI build: read/write/close/lseek/isatty */
#include <process.h> /* getpid */
#include <direct.h>
#endif
#else
#include <sys/file.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <stdarg.h>
#include <time.h>
#include <unistd.h>
#endif

#include "ds4.h"

Expand Down
6 changes: 6 additions & 0 deletions ds4_bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
#if defined(_WIN32) && !defined(__MINGW32__)
/* Native Windows GPU (HIP/clang-MSVC) build: MSVC lacks clock_gettime/
* CLOCK_MONOTONIC; the shim supplies them. The MinGW CPU build already has
* them via <time.h>, so it does not include the shim. See win/ds4_win.h. */
#include "ds4_win.h"
#endif

typedef struct {
const char *model_path;
Expand Down
Loading