GaleMind · Ritek · Sep 28, 2025 · Oct 4, 2025
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -0,0 +1,35 @@
+# Use the same base image as before (Rust on Debian Bookworm)
+FROM mcr.microsoft.com/devcontainers/rust:1-1-bookworm
+
+# Avoid interactive prompts
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install protoc (protobuf compiler) and other dev tools required for building gRPC/tonic/prost
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+       protobuf-compiler \
+       git \
+       htop \
+       make \
+       libprotobuf-dev \
+       pkg-config \
+       build-essential \
+       clang \
+       cmake \
+       git \
+       curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install uv (fast Python package manager)
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
+    && echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> /home/vscode/.bashrc
+
+# Install protoc-gen-grpc-web if needed (optional) - commented out for now
+# RUN curl -L -o /usr/local/bin/protoc-gen-grpc-web \ 
+#     https://github.com/grpc/grpc-web/releases/download/1.4.0/protoc-gen-grpc-web-1.4.0-linux-x86_64 \
+#     && chmod +x /usr/local/bin/protoc-gen-grpc-web
+
+# Ensure the vscode user can use installed tools (paths are system-wide)
+
+# Switch back to default non-root user set by the base image (if any)
+USER vscode
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -1,6 +1,8 @@
 {
   "name": "GaleMind Server Dev Environment",
-  "image": "mcr.microsoft.com/devcontainers/rust:1-1-bookworm",
+  "build": {
+    "dockerfile": "Dockerfile"
+  },
   "features": {
     "ghcr.io/devcontainers/features/python:1": {
       "version": "3.12"
@@ -21,7 +23,8 @@
         "ms-python.pylint",
         "ms-python.black-formatter",
         "vadimcn.vscode-lldb",
-        "serayuzgur.crates"
+        "serayuzgur.crates",
+        "zxh404.vscode-proto3"
       ],
       "settings": {
         "rust-analyzer.cargo.allFeatures": true,
@@ -40,10 +43,6 @@
       "onAutoForward": "notify"
     }
   },
-  "postCreateCommand": "cargo build",
   "remoteUser": "vscode",
-  "workspaceFolder": "/workspaces/galemind-server",
-  "mounts": [
-    "source=${localWorkspaceFolder}/models,target=/workspaces/galemind-server/models,type=bind,consistency=cached"
-  ]
+  "workspaceFolder": "/workspaces/galemind-server"
 }
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+/target/
diff --git a/Makefile b/Makefile
@@ -11,6 +11,9 @@ format:
 	cargo fmt
 
 run:
+	@if [ -f .env ]; then \
+		set -a && . ./.env && set +a; \
+	fi && \
 	cargo run -p galemind start
 docker-build:
 	$(eval TIMESTAMP := $(shell date +%Y%m%d%H%M))

diff --git a/README.md b/README.md
@@ -15,7 +15,19 @@ git clone <repository-url>
 cd galemind-server
 ```
 
-2. Install Rust dependencies:
+2. Install dependencies:  
+Make sure you have installed `libssl-dev`! Rust openSSL crate depends on it.  
+For Debian derivatives:
+```bash
+sudo apt install libssl-dev
+```
+
+Make sure you have installed `protobuf-compiler`! Rust grpc_server crate depends on it.  
+For Debian derivatives:  
+```bash
+sudo apt install protobuf-compiler
+```
+
 ```bash
 cargo build
 ```
@@ -90,6 +102,199 @@ cargo run -p galemind start \
 - **REST API**: Available at `http://localhost:8080` (default)
 - **gRPC API**: Available at `localhost:50051` (default)
 
+## API Usage
+
+The REST server supports both the native Galemind protocol and OpenAI-compatible API through the `X-Protocol-Inference` header.
+
+### OpenAI Protocol
+
+Use `X-Protocol-Inference: openai` header to interact with OpenAI-compatible endpoints:
+
+#### Chat Completions
+```bash
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "X-Protocol-Inference: openai" \
+  -d '{
+    "model": "gpt-3.5-turbo",
+    "messages": [
+      {"role": "user", "content": "Hello, how are you?"}
+    ],
+    "temperature": 0.7,
+    "max_tokens": 150
+  }'
+```
+
+#### List Models
+```bash
+curl -X GET http://localhost:8080/v1/models \
+  -H "X-Protocol-Inference: openai"
+```
+
+#### Model Ready Check
+```bash
+curl -X GET http://localhost:8080/v1/models/gpt-3.5-turbo/ready \
+  -H "X-Protocol-Inference: openai"
+```
+
+### Galemind Protocol
+
+Use `X-Protocol-Inference: galemind` header (or omit header for default) to use the native Galemind protocol:
+
+#### Inference Request
+```bash
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "X-Protocol-Inference: galemind" \
+  -d '{
+    "id": "test-request-1",
+    "inputs": [
+      {
+        "name": "input_text",
+        "shape": [1],
+        "datatype": "string",
+        "data": ["Hello, how are you?"]
+      }
+    ]
+  }'
+```
+
+#### List Models
+```bash
+curl -X GET http://localhost:8080/v1/models \
+  -H "X-Protocol-Inference: galemind"
+```
+
+#### Model Ready Check
+```bash
+curl -X GET http://localhost:8080/v1/models/my-model/ready \
+  -H "X-Protocol-Inference: galemind"
+```
+
+## gRPC Unified Interface
+
+The gRPC server now supports an enhanced unified interface that provides:
+
+- **Protocol Selection**: Choose between Galemind and OpenAI protocols
+- **Multiple Content Types**: Text, Binary, and Base64 content support
+- **Streaming Support**: Advanced streaming with chunk management and end-of-stream detection
+- **Backward Compatibility**: Full compatibility with existing ModelInfer methods
+
+### New gRPC Service Methods
+
+#### UnifiedInfer (Single Request/Response)
+```protobuf
+rpc UnifiedInfer(UnifiedInferRequest) returns (UnifiedInferResponse)
+```
+
+#### UnifiedInferStream (Bidirectional Streaming)
+```protobuf
+rpc UnifiedInferStream(stream UnifiedInferRequest) returns (stream UnifiedInferResponse)
+```
+
+### Message Structure
+
+#### UnifiedInferRequest
+```protobuf
+message UnifiedInferRequest {
+  InferenceProtocol protocol = 1;           // PROTOCOL_GALEMIND or PROTOCOL_OPENAI
+  optional ModelInferRequest legacy_request = 2;  // For backward compatibility
+  MessageContent content = 3;               // Enhanced content with type support
+  optional StreamMetadata stream_metadata = 4;    // Streaming metadata
+  string model_name = 5;
+  string model_version = 6;
+  string request_id = 7;
+  map<string, InferParameter> parameters = 8;
+  map<string, string> metadata = 9;
+}
+```
+
+#### Content Types
+- **CONTENT_TYPE_TEXT**: Plain text content
+- **CONTENT_TYPE_BINARY**: Raw binary data
+- **CONTENT_TYPE_BASE64**: Base64-encoded content
+
+#### Streaming Features
+- **Stream ID**: Unique identifier for stream sessions
+- **Chunk Sequencing**: Ordered chunk processing with sequence numbers
+- **End-of-Stream Detection**: Automatic stream completion handling
+- **Stream Reconstruction**: Automatic combining of chunked content
+
+### Backward Compatibility
+
+The enhanced interface maintains full backward compatibility:
+
+1. **Legacy Support**: Original `ModelInfer` and `ModelInferAsync` methods continue to work
+2. **Legacy Request Field**: Use `legacy_request` field in `UnifiedInferRequest` to wrap existing requests
+3. **Protocol Fallback**: Defaults to Galemind protocol when not specified
+
+### Example Usage Patterns
+
+#### Single Request with OpenAI Protocol
+```protobuf
+UnifiedInferRequest {
+  protocol: PROTOCOL_OPENAI
+  content: {
+    content_type: CONTENT_TYPE_TEXT
+    text_content: "Hello, how are you?"
+  }
+  model_name: "gpt-3.5-turbo"
+  request_id: "req_123"
+}
+```
+
+#### Streaming with Chunks
+```protobuf
+// Chunk 1
+UnifiedInferRequest {
+  protocol: PROTOCOL_GALEMIND
+  content: {
+    content_type: CONTENT_TYPE_TEXT
+    text_content: "First part of message"
+  }
+  stream_metadata: {
+    stream_id: "stream_456"
+    chunk_sequence: 1
+    is_streaming: true
+    end_of_stream: false
+    total_chunks: 3
+  }
+  model_name: "my-model"
+  request_id: "req_456"
+}
+
+// Final Chunk
+UnifiedInferRequest {
+  protocol: PROTOCOL_GALEMIND
+  content: {
+    content_type: CONTENT_TYPE_TEXT
+    text_content: "Final part of message"
+  }
+  stream_metadata: {
+    stream_id: "stream_456"
+    chunk_sequence: 3
+    is_streaming: true
+    end_of_stream: true
+    total_chunks: 3
+  }
+  model_name: "my-model"
+  request_id: "req_456"
+}
+```
+
+#### Binary Content Processing
+```protobuf
+UnifiedInferRequest {
+  protocol: PROTOCOL_GALEMIND
+  content: {
+    content_type: CONTENT_TYPE_BINARY
+    binary_content: [raw_bytes_here]
+  }
+  model_name: "image-processor"
+  request_id: "req_789"
+}
+```
+
 ### Available Make Commands
 
 | Command | Description |

diff --git a/src/foundation/src/api/mlflow_client.rs b/src/foundation/src/api/mlflow_client.rs
@@ -1,4 +1,4 @@
-use anyhow::{Result, anyhow};
+use anyhow::{anyhow, Result};
 use async_trait::async_trait;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};

diff --git a/src/foundation/src/model/model_discovery_service.rs b/src/foundation/src/model/model_discovery_service.rs
@@ -279,7 +279,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_discover_models_with_mlflow_source() {
-        let service = ModelDiscoveryService::new(10);
+        let _service = ModelDiscoveryService::new(10);
         let sources = vec![ModelSource::MLFlow {
             base_url: "http://localhost:5000".to_string(),
             api_token: None,
@@ -294,7 +294,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_discover_all_models_from_mlflow() {
-        let service = ModelDiscoveryService::new(10);
+        let _service = ModelDiscoveryService::new(10);
         let sources = vec![ModelSource::MLFlow {
             base_url: "http://localhost:5000".to_string(),
             api_token: Some("token123".to_string()),