Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
/ds4
/ds4-server
/ds4-bench
/ds4-bench.exe
/ds4_native
/ds4_server_test
/ds4_test
/ds4flash.gguf
/TODO.md
/gguf/
*.o
*.exe
*.dSYM/
/misc/
.*.swp
Expand Down
41 changes: 39 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
CC ?= cc
UNAME_S := $(shell uname -s)

# On MinGW/MSYS `uname -s` is e.g. MINGW64_NT-10.0 or MSYS_NT-10.0.
IS_WINDOWS := $(filter MINGW% MSYS%,$(UNAME_S))

# MinGW has no `cc`; default the compiler to gcc there (still overridable).
ifneq ($(IS_WINDOWS),)
ifeq ($(origin CC),default)
CC := gcc
endif
endif

ifeq ($(UNAME_S),Darwin)
NATIVE_CPU_FLAG ?= -mcpu=native
else
NATIVE_CPU_FLAG ?= -march=native
endif

# Native Windows (MinGW-w64) CPU build flags. ds4.c pulls in the dependency-free
# POSIX shim (ds4_win.h) behind #ifdef _WIN32; no extra -I/-include is needed.
WIN_CFLAGS ?= -O3 -ffast-math $(NATIVE_CPU_FLAG) -std=c99 -D_GNU_SOURCE \
-fno-finite-math-only -DDS4_NO_GPU -D_CRT_SECURE_NO_WARNINGS
WIN_LDLIBS ?= -lm
Comment on lines +20 to +24

CFLAGS ?= -O3 -ffast-math $(NATIVE_CPU_FLAG) -Wall -Wextra -std=c99
OBJCFLAGS ?= -O3 -ffast-math $(NATIVE_CPU_FLAG) -Wall -Wextra -fobjc-arc

Expand Down Expand Up @@ -57,7 +73,7 @@ METAL_LDLIBS := $(LDLIBS)

endif

.PHONY: all help clean test cpu cuda cuda-spark cuda-generic cuda-regression
.PHONY: all help clean test cpu cuda cuda-spark cuda-generic cuda-regression windows-cpu

ifeq ($(UNAME_S),Darwin)
all: ds4 ds4-server ds4-bench
Expand Down Expand Up @@ -85,6 +101,27 @@ cpu: ds4_cli_cpu.o ds4_server_cpu.o ds4_bench_cpu.o linenoise.o rax.o $(CPU_CORE

cuda-regression:
@echo "cuda-regression requires a CUDA build"

else ifneq ($(IS_WINDOWS),)
# ---- Native Windows (MinGW-w64) ------------------------------------------
# Only the CPU bench is portable today. The CLI (linenoise/termios + sigaction)
# and server (BSD sockets/poll) still need Windows ports; see win/README.md.
all: help

help:
@echo "DS4 build targets (native Windows / MinGW-w64):"
@echo " make windows-cpu Build native Windows CPU ./ds4-bench.exe"
@echo " make clean Remove build outputs"
@echo ""
@echo " ds4 (CLI) and ds4-server are not yet ported to Windows."

windows-cpu: ds4-bench.exe

ds4-bench.exe: ds4_bench.c ds4.c ds4.h ds4_gpu.h ds4_win.h
$(CC) $(WIN_CFLAGS) -c -o ds4_cpu.o ds4.c
$(CC) $(WIN_CFLAGS) -c -o ds4_bench_cpu.o ds4_bench.c
$(CC) $(WIN_CFLAGS) -o $@ ds4_bench_cpu.o ds4_cpu.o $(WIN_LDLIBS)

else
all: help

Expand Down Expand Up @@ -194,5 +231,5 @@ test: ds4_test
./ds4_test

clean:
rm -f ds4 ds4-server ds4-bench ds4_cpu ds4_native ds4_server_test ds4_test *.o tests/cuda_long_context_smoke tests/cuda_long_context_smoke.o
rm -f ds4 ds4-server ds4-bench ds4_cpu ds4_native ds4_server_test ds4_test *.o *.exe tests/cuda_long_context_smoke tests/cuda_long_context_smoke.o

10 changes: 10 additions & 0 deletions ds4.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,22 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
/* Native Windows (MinGW-w64) CPU build: a small dependency-free POSIX shim
* supplies mmap/flock/pread/sysconf/dprintf/fmemopen. See ds4_win.h. */
#include "ds4_win.h"
#include <sys/stat.h>
#include <stdarg.h>
#include <time.h>
#include <unistd.h>
#else
#include <sys/file.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <stdarg.h>
#include <time.h>
#include <unistd.h>
#endif

#include "ds4.h"

Expand Down
232 changes: 232 additions & 0 deletions ds4_win.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
/* ds4_win.h — minimal POSIX compatibility layer for native Windows builds.
*
* Provides just the POSIX surface ds4.c relies on that MinGW/UCRT lacks:
* - mmap / munmap / madvise (read-only file mappings)
* - sysconf(_SC_NPROCESSORS_ONLN / _SC_PAGESIZE)
* - flock / fcntl(F_SETFD,FD_CLOEXEC) / pread / ftruncate / dprintf (instance lock)
* - fmemopen (fixed-buffer "wb"/"rb", temp-file backed with copy-back on close)
*
* Header-only, self-contained, no third-party deps. The whole body is guarded by
* _WIN32, so this header is inert on POSIX platforms. ds4.c includes it in place
* of <sys/mman.h> (and the other POSIX-only surface) behind #ifdef _WIN32, so the
* native MinGW-w64 CPU build needs no extra include/search-path flags. MinGW
* already provides pthread, clock_gettime and ftruncate.
*/
#ifndef DS4_WIN_H
#define DS4_WIN_H

#ifdef _WIN32

#include <windows.h>
#include <io.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>

/* ---- mmap ---------------------------------------------------------------- */
#define PROT_NONE 0x0
#define PROT_READ 0x1
#define PROT_WRITE 0x2
#define PROT_EXEC 0x4
#define MAP_SHARED 0x01
#define MAP_PRIVATE 0x02
#define MAP_FAILED ((void *)-1)
#define POSIX_MADV_NORMAL 0
#define POSIX_MADV_RANDOM 1
#define POSIX_MADV_SEQUENTIAL 2
#define POSIX_MADV_WILLNEED 3
#define POSIX_MADV_DONTNEED 4
#define MADV_WILLNEED POSIX_MADV_WILLNEED

#ifndef _SC_PAGESIZE
#define _SC_PAGESIZE 0x1
#endif
#ifndef _SC_NPROCESSORS_ONLN
#define _SC_NPROCESSORS_ONLN 0x2
#endif

/* ---- file locking / fd flags -------------------------------------------- */
#ifndef F_SETFD
#define F_SETFD 2
#endif
#ifndef FD_CLOEXEC
#define FD_CLOEXEC 1
#endif
#define LOCK_SH 1
#define LOCK_EX 2
#define LOCK_NB 4
#define LOCK_UN 8

static inline void *mmap(void *addr, size_t length, int prot, int flags,
int fd, long long offset)
{
(void)addr; (void)flags; (void)prot;
HANDLE fh = (HANDLE)_get_osfhandle(fd);
if (fh == INVALID_HANDLE_VALUE) { errno = EBADF; return MAP_FAILED; }
HANDLE mh = CreateFileMappingA(fh, NULL, PAGE_READONLY, 0, 0, NULL);
if (mh == NULL) { errno = ENOMEM; return MAP_FAILED; }
DWORD off_hi = (DWORD)((uint64_t)offset >> 32);
DWORD off_lo = (DWORD)((uint64_t)offset & 0xFFFFFFFFu);
void *p = MapViewOfFile(mh, FILE_MAP_READ, off_hi, off_lo, length);
CloseHandle(mh); /* view keeps the section alive */
if (p == NULL) { errno = ENOMEM; return MAP_FAILED; }
return p;
}

static inline int munmap(void *addr, size_t length)
{
(void)length;
return UnmapViewOfFile(addr) ? 0 : -1;
}

static inline int posix_madvise(void *addr, size_t length, int advice)
{
(void)addr; (void)length; (void)advice;
return 0; /* advisory only */
}
static inline int madvise(void *addr, size_t length, int advice)
{
return posix_madvise(addr, length, advice);
}

static inline long sysconf(int name)
{
SYSTEM_INFO si;
GetSystemInfo(&si);
if (name == _SC_NPROCESSORS_ONLN) return (long)si.dwNumberOfProcessors;
if (name == _SC_PAGESIZE) return (long)si.dwPageSize;
errno = EINVAL;
return -1;
}

static inline int flock(int fd, int op)
{
HANDLE h = (HANDLE)_get_osfhandle(fd);
if (h == INVALID_HANDLE_VALUE) { errno = EBADF; return -1; }
OVERLAPPED ov; memset(&ov, 0, sizeof(ov));
if (op & LOCK_UN) {
return UnlockFileEx(h, 0, MAXDWORD, MAXDWORD, &ov) ? 0 : -1;
}
DWORD f = 0;
if (op & LOCK_EX) f |= LOCKFILE_EXCLUSIVE_LOCK;
if (op & LOCK_NB) f |= LOCKFILE_FAIL_IMMEDIATELY;
if (!LockFileEx(h, f, 0, MAXDWORD, MAXDWORD, &ov)) {
errno = (GetLastError() == ERROR_LOCK_VIOLATION) ? EWOULDBLOCK : EACCES;
return -1;
}
return 0;
}

static inline int fcntl(int fd, int cmd, ...)
{
(void)fd; (void)cmd;
return 0; /* F_SETFD/FD_CLOEXEC is a no-op: Windows handles aren't inherited by default */
}
Comment on lines +125 to +129

static inline long long ds4_pread(int fd, void *buf, size_t count, long long offset)
{
HANDLE h = (HANDLE)_get_osfhandle(fd);
if (h == INVALID_HANDLE_VALUE) { errno = EBADF; return -1; }
OVERLAPPED ov; memset(&ov, 0, sizeof(ov));
ov.Offset = (DWORD)((uint64_t)offset & 0xFFFFFFFFu);
ov.OffsetHigh = (DWORD)((uint64_t)offset >> 32);
DWORD got = 0;
if (!ReadFile(h, buf, (DWORD)count, &got, &ov)) {
if (GetLastError() == ERROR_HANDLE_EOF) return 0;
errno = EIO; return -1;
}
return (long long)got;
}
Comment on lines +131 to +144
#define pread(fd, buf, count, offset) ds4_pread((fd), (buf), (size_t)(count), (long long)(offset))

/* ftruncate is already provided by MinGW <unistd.h>. */

static inline int dprintf(int fd, const char *fmt, ...)
{
char buf[512];
va_list ap; va_start(ap, fmt);
int n = vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
if (n < 0) return -1;
if (n > (int)sizeof(buf)) n = (int)sizeof(buf);
return _write(fd, buf, (unsigned)n);
}
Comment on lines +149 to +158

/* ---- fmemopen (temp-file backed, fixed buffer) --------------------------- */
typedef struct { FILE *fp; void *buf; size_t cap; int writeback; } ds4_memstream;
#define DS4_MEMSTREAM_MAX 16
static ds4_memstream ds4_ms_tab[DS4_MEMSTREAM_MAX];
static CRITICAL_SECTION ds4_ms_cs;
static volatile LONG ds4_ms_init = 0;

static inline void ds4_ms_ensure(void)
{
if (InterlockedCompareExchange(&ds4_ms_init, 1, 0) == 0)
InitializeCriticalSection(&ds4_ms_cs);
}
Comment on lines +167 to +171

static inline FILE *ds4_tmpfile(void)
{
char dir[MAX_PATH], path[MAX_PATH];
if (!GetTempPathA(sizeof(dir), dir)) return NULL;
if (!GetTempFileNameA(dir, "ds4", 0, path)) return NULL;
/* open read/write, delete on close */
return fopen(path, "wb+TD"); /* T=temporary, D=delete-on-close (MSVCRT ext) */
}
Comment on lines +173 to +180

static inline FILE *fmemopen(void *buf, size_t size, const char *mode)
{
ds4_ms_ensure();
int writing = (mode && (strchr(mode, 'w') || strchr(mode, 'a') || strchr(mode, '+')));
FILE *fp = ds4_tmpfile();
if (!fp) return NULL;
if (!writing && buf && size) {
if (fwrite(buf, 1, size, fp) != size) { fclose(fp); return NULL; }
rewind(fp);
}
EnterCriticalSection(&ds4_ms_cs);
for (int i = 0; i < DS4_MEMSTREAM_MAX; i++) {
if (ds4_ms_tab[i].fp == NULL) {
ds4_ms_tab[i].fp = fp; ds4_ms_tab[i].buf = buf;
ds4_ms_tab[i].cap = size; ds4_ms_tab[i].writeback = writing ? 1 : 0;
break;
}
}
LeaveCriticalSection(&ds4_ms_cs);
return fp;
Comment on lines +192 to +201
}

static inline int ds4_win_fclose(FILE *fp)
{
if (fp && ds4_ms_init) {
EnterCriticalSection(&ds4_ms_cs);
for (int i = 0; i < DS4_MEMSTREAM_MAX; i++) {
if (ds4_ms_tab[i].fp == fp) {
if (ds4_ms_tab[i].writeback && ds4_ms_tab[i].buf && ds4_ms_tab[i].cap) {
fflush(fp); rewind(fp);
fread(ds4_ms_tab[i].buf, 1, ds4_ms_tab[i].cap, fp); /* copy back */
}
ds4_ms_tab[i].fp = NULL; ds4_ms_tab[i].buf = NULL;
ds4_ms_tab[i].cap = 0; ds4_ms_tab[i].writeback = 0;
break;
}
}
LeaveCriticalSection(&ds4_ms_cs);
}
return fclose(fp); /* real fclose — macro defined only after this header */
}

#endif /* _WIN32 */

/* Redirect fclose AFTER all helpers above so ds4_win_fclose's own call hits the
* real fclose. Source files including this header get the memory-stream-aware one. */
#ifdef _WIN32
#define fclose(fp) ds4_win_fclose(fp)
#endif

#endif /* DS4_WIN_H */
Loading