vllm-project · sjmonson · May 15, 2026 · May 11, 2026 · May 14, 2026 · May 15, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -95,15 +95,15 @@ Running benchmarks requires an active model server. Here are some example comman
 uv run guidellm benchmark run \
   --target http://localhost:8000 \
   --profile sweep \
-  --data "prompt_tokens=256,output_tokens=128" \
+  --data "kind=synthetic_text,prompt_tokens=256,output_tokens=128" \
   --min-requests 1000
 
 # Production-like benchmark with specific dataset
 uv run guidellm benchmark run \
   --target http://localhost:8000 \
   --profile constant \
   --rate 10,20 \
-  --data "openai/gsm8k" \
+  --data "kind=huggingface,data=openai/gsm8k" \
   --max-seconds 300 \
   --outputs "benchmark.json,report.csv"
 ```

diff --git a/README.md b/README.md
@@ -94,7 +94,7 @@ podman run \
   -e GUIDELLM_TARGET=http://localhost:8000 \
   -e GUIDELLM_PROFILE=sweep \
   -e GUIDELLM_MAX_SECONDS=30 \
-  -e GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \
+  -e GUIDELLM_DATA="kind=synthetic_text,prompt_tokens=256,output_tokens=128" \
   ghcr.io/vllm-project/guidellm:latest
 ```
 
@@ -117,7 +117,7 @@ guidellm benchmark \
   --target "http://localhost:8000" \
   --profile sweep \
   --max-seconds 30 \
-  --data "prompt_tokens=256,output_tokens=128"
+  --data "kind=synthetic_text,prompt_tokens=256,output_tokens=128"
 ```
 
 You will see progress updates and per-benchmark summaries during the run, as given below:
@@ -168,7 +168,7 @@ guidellm benchmark \
   --profile constant \
   --rate 10 \
   --max-seconds 20 \
-  --data "prompt_tokens=128,output_tokens=256"
+  --data "kind=synthetic_text,prompt_tokens=128,output_tokens=256"
 ```
 
 **Key parameters:**
@@ -184,15 +184,13 @@ GuideLLM supports HuggingFace datasets, local files, and synthetic data. This ex
 ```bash
 guidellm benchmark run \
   --target http://localhost:8000 \
-  --data "abisee/cnn_dailymail" \
-  --data-args '{"name": "3.0.0"}' \
+  --data '{"kind": "huggingface", "data": "abisee/cnn_dailymail", "load_kwargs": {"name": "3.0.0"}}' \
   --data-column-mapper '{"column_mappings": {"text_column": "article"}}'
 ```
 
 **Key parameters:**
 
-- `--data`: Data source specification - accepts HuggingFace dataset IDs (prefix with `hf:`), local file paths (`.json`, `.csv`, `.jsonl`, `.txt`), or synthetic data configs (JSON object or `key=value` pairs like `prompt_tokens=256,output_tokens=128`)
-- `--data-args`: Arguments for loading the dataset. See [`datasets.load_dataset`](https://huggingface.co/docs/datasets/v4.5.0/en/package_reference/loading_methods#datasets.load_dataset) for valid options.
+- `--data`: Data source specification — pass `kind=synthetic_text,prompt_tokens=...,output_tokens=...` for synthetic data, `kind=huggingface,data=DATASET_ID` for HuggingFace datasets (with optional `load_kwargs` for dataset loading args), `kind=json_file,path=...` / `kind=csv_file,path=...` / `kind=text_file,path=...` for local files, or `kind=trace_synthetic,path=...` for trace replay files. Can be specified multiple times for multiple data sources.
 - `--data-column-mapper`: JSON object of arguments for dataset creation - commonly used to specify column mappings like `text_column`, `output_tokens_count_column`, or HuggingFace dataset parameters
 - `--data-samples`: Number of samples to use from the dataset - use `-1` (default) for all samples with dynamic generation, or specify a positive integer to limit sample count
 - `--processor`: Tokenizer or processor name used for generating synthetic data - if not provided and required for the dataset, automatically loads from the model; accepts HuggingFace model IDs or local paths
@@ -205,7 +203,7 @@ You can benchmark chat completions, text completions, or other supported request
 guidellm benchmark \
   --target http://localhost:8000 \
   --request-type chat_completions \
-  --data path/to/data.json
+  --data "kind=json_file,path=path/to/data.json"
 ```
 
 **Key parameters:**
@@ -236,7 +234,7 @@ guidellm benchmark \
   --warmup 0.1 \
   --cooldown 0.1 \
   --max-errors 5 \
-  --data "prompt_tokens=256,output_tokens=128" \
+  --data "kind=synthetic_text,prompt_tokens=256,output_tokens=128" \
   --detect-saturation
 ```
 

diff --git a/docs/examples/practice_on_vllm_simulator.md b/docs/examples/practice_on_vllm_simulator.md
@@ -99,7 +99,7 @@ guidellm benchmark \
 --profile sweep \
 --max-seconds 10 \
 --max-requests 10 \
---data "prompt_tokens=128,output_tokens=56"
+--data "kind=synthetic_text,prompt_tokens=128,output_tokens=56"
 ```
 
 ______________________________________________________________________

diff --git a/docs/getting-started/benchmark.md b/docs/getting-started/benchmark.md
@@ -32,7 +32,7 @@ To run a benchmark against your local vLLM server with default settings:
 ```bash
 guidellm benchmark \
   --target "http://localhost:8000" \
-  --data "prompt_tokens=256,output_tokens=128" \
+  --data "kind=synthetic_text,prompt_tokens=256,output_tokens=128" \
   --max-seconds 60
 ```
 
@@ -55,19 +55,19 @@ GuideLLM offers a wide range of configuration options to customize your benchmar
 
 ### Key Parameters
 
-| Parameter        | Description                                    | Example                                        |
-| ---------------- | ---------------------------------------------- | ---------------------------------------------- |
-| `--target`       | URL of the OpenAI-compatible server            | `--target "http://localhost:8000"`             |
-| `--model`        | Model name to benchmark                        | `--model "Meta-Llama-3.1-8B-Instruct"`         |
-| `--data`         | Data configuration for benchmarking            | `--data "prompt_tokens=256,output_tokens=128"` |
-| `--profile`      | Type of benchmark profile to run               | `--profile sweep`                              |
-| `--rate`         | Request rate or number of benchmarks for sweep | `--rate 10`                                    |
-| `--random-seed`  | Random seed for reproducibility                | `--random-seed 42`                             |
-| `--max-seconds`  | Duration for each benchmark in seconds         | `--max-seconds 30`                             |
-| `--max-requests` | Maximum number of requests for each benchmark  | `--max-requests 1000`                          |
-| `--data-samples` | Maximum number of dataset rows to load         | `--data-samples 1000`                          |
-| `--output-dir`   | Directory path to save output files            | `--output-dir results/`                        |
-| `--outputs`      | Output formats to generate                     | `--outputs json csv html`                      |
+| Parameter        | Description                                    | Example                                                            |
+| ---------------- | ---------------------------------------------- | ------------------------------------------------------------------ |
+| `--target`       | URL of the OpenAI-compatible server            | `--target "http://localhost:8000"`                                 |
+| `--model`        | Model name to benchmark                        | `--model "Meta-Llama-3.1-8B-Instruct"`                             |
+| `--data`         | Data configuration for benchmarking            | `--data "kind=synthetic_text,prompt_tokens=256,output_tokens=128"` |
+| `--profile`      | Type of benchmark profile to run               | `--profile sweep`                                                  |
+| `--rate`         | Request rate or number of benchmarks for sweep | `--rate 10`                                                        |
+| `--random-seed`  | Random seed for reproducibility                | `--random-seed 42`                                                 |
+| `--max-seconds`  | Duration for each benchmark in seconds         | `--max-seconds 30`                                                 |
+| `--max-requests` | Maximum number of requests for each benchmark  | `--max-requests 1000`                                              |
+| `--data-samples` | Maximum number of dataset rows to load         | `--data-samples 1000`                                              |
+| `--output-dir`   | Directory path to save output files            | `--output-dir results/`                                            |
+| `--outputs`      | Output formats to generate                     | `--outputs json csv html`                                          |
 
 ### Random Seed (`--random-seed`)
 
@@ -171,17 +171,17 @@ guidellm benchmark --profile sweep
 
 ### Synthetic Data Options
 
-For synthetic data, parameters for random data generation are passed as key=value pairs to the `--data` parameter. Some key options include:
+For synthetic data, pass `kind=synthetic_text` with the desired parameters to the `--data` argument. Some key options include:
 
 - `prompt_tokens`: Average number of tokens for prompts
 - `output_tokens`: Average number of tokens for outputs
 
-For example, to generate 1000 samples with a prompt length of 100 tokens and an output length of 50 tokens:
+For example, to benchmark with a prompt length of 100 tokens and an output length of 50 tokens:
 
 ```bash
 guidellm benchmark \
   --target "http://localhost:8000" \
-  --data "prompt_tokens=100,output_tokens=50,samples=1000" \
+  --data "kind=synthetic_text,prompt_tokens=100,output_tokens=50" \
   --profile constant \
   --rate 5
 ```
@@ -204,8 +204,7 @@ Run with the `replay` profile:
 ```bash
 guidellm benchmark \
   --target "http://localhost:8000" \
-  --data path/to/trace.jsonl \
-  --data-args type_=trace_synthetic \
+  --data "kind=trace_synthetic,path=path/to/trace.jsonl" \
   --profile replay \
   --rate 1.0
 ```
@@ -214,7 +213,15 @@ The `--rate` parameter acts as a time scale for the intervals between trace even
 
 GuideLLM orders trace rows by timestamp before scheduling and payload generation, so each scheduled event uses the token lengths from the same sorted row. Use `--data-samples` to limit how many trace rows are loaded and replayed. `--max-requests` remains a runtime completion constraint; it does not truncate the trace dataset.
 
-If your trace uses different column names, map them with `timestamp_column`, `prompt_tokens_column`, and `output_tokens_column` in `--data-args`.
+If your trace uses different column names, include `timestamp_column`, `prompt_tokens_column`, and `output_tokens_column` directly in the `--data` argument:
+
+```bash
+guidellm benchmark \
+  --target "http://localhost:8000" \
+  --data "kind=trace_synthetic,path=replay.jsonl,timestamp_column=timestamp,prompt_tokens_column=input_length,output_tokens_column=output_length" \
+  --profile replay \
+  --rate 1.0
+```
 
 For very small prompts (roughly under 15 tokens, depending on the tokenizer), GuideLLM may not have enough room to include the full per-row unique prefix. Different rows can then produce similar or identical prompts, which reduces cache resistance in replay benchmarks.
 
@@ -225,7 +232,7 @@ While synthetic data is convenient for quick tests, you can benchmark with real-
 ```bash
 guidellm benchmark \
   --target "http://localhost:8000" \
-  --data "/path/to/your/dataset.json" \
+  --data "kind=json_file,path=/path/to/your/dataset.json" \
   --profile constant \
   --rate 5
 ```
@@ -235,7 +242,7 @@ You can also use datasets from HuggingFace:
 ```bash
 guidellm benchmark \
   --target "http://localhost:8000" \
-  --data garage-bAInd/Open-Platypus \
+  --data "kind=huggingface,data=garage-bAInd/Open-Platypus" \
   --profile constant \
   --rate 5
 ```

diff --git a/docs/guides/backends.md b/docs/guides/backends.md
@@ -81,7 +81,7 @@ guidellm benchmark \
   --target "https://api.openai.com/v1" \
   --backend-kwargs '{"api_key": "sk-..."}' \
   --model "gpt-3.5-turbo" \
-  --data "prompt_tokens=256,output_tokens=128"
+  --data "kind=synthetic_text,prompt_tokens=256,output_tokens=128"
 ```
 
 The API key is used to set the `Authorization: Bearer {api_key}` header in HTTP requests to the backend server.
@@ -101,7 +101,7 @@ The `extras` field accepts a `body` key whose values are merged directly into th
 guidellm benchmark \
   --target "http://localhost:8000/v1" \
   --model "meta-llama/Meta-Llama-3.1-8B-Instruct" \
-  --data "prompt_tokens=256,output_tokens=128" \
+  --data "kind=synthetic_text,prompt_tokens=256,output_tokens=128" \
   --backend-kwargs '{"extras": {"body": {"temperature": 0.6, "top_p": 0.95, "top_k": 20}}}'
 ```
 
@@ -121,7 +121,7 @@ The `--backend-kwargs` option accepts a JSON string that is passed as keyword ar
 guidellm benchmark \
   --target "http://localhost:8000/v1" \
   --model "meta-llama/Meta-Llama-3.1-8B-Instruct" \
-  --data "prompt_tokens=256,output_tokens=128" \
+  --data "kind=synthetic_text,prompt_tokens=256,output_tokens=128" \
   --backend-kwargs '{"api_key": "sk-...", "extras": {"body": {"temperature": 0.8, "top_p": 0.9}}}'
 ```