apache · adityamparikh · Mar 26, 2026 · Apr 6, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -8,7 +8,7 @@ Solr MCP Server is a Spring AI Model Context Protocol (MCP) server that enables
 
 - **Status:** Apache incubating project (v0.0.2-SNAPSHOT)
 - **Java:** 25+ (centralized in build.gradle.kts)
-- **Framework:** Spring Boot 3.5.8, Spring AI 1.1.2
+- **Framework:** Spring Boot 3.5.8, Spring AI 1.1.4
 - **License:** Apache 2.0
 
 ## Common Commands
@@ -44,7 +44,7 @@ PROFILES=http ./gradlew bootRun    # HTTP mode
 Four service classes expose MCP tools via `@McpTool` annotations:
 
 - **SearchService** (`search/`) - Full-text search with filtering, faceting, sorting, pagination
-- **IndexingService** (`indexing/`) - Document indexing supporting JSON, CSV, XML formats
+- **IndexingService** (`indexing/`) - Document indexing supporting JSON, CSV, XML formats and file uploads (text extracted by chat client)
 - **CollectionService** (`metadata/`) - List collections, get stats, health checks
 - **SchemaService** (`metadata/`) - Schema introspection
 
@@ -53,6 +53,7 @@ Four service classes expose MCP tools via `@McpTool` annotations:
 `indexing/documentcreator/` uses strategy pattern for format parsing:
 - `SolrDocumentCreator` - Common interface
 - `JsonDocumentCreator`, `CsvDocumentCreator`, `XmlDocumentCreator` - Format implementations
+- `FileDocumentCreator` - File content indexing (text already extracted by AI chat client)
 - `IndexingDocumentCreator` - Orchestrator that delegates to format-specific creators
 - `FieldNameSanitizer` - Automatic field name validation for Solr compatibility
 

diff --git a/src/main/java/org/apache/solr/mcp/server/indexing/IndexingService.java b/src/main/java/org/apache/solr/mcp/server/indexing/IndexingService.java
@@ -356,6 +356,52 @@ public void indexXmlDocuments(@McpToolParam(description = "Solr collection to in
 		indexDocuments(collection, schemalessDoc);
 	}
 
+	/**
+	 * Indexes a document from file content into a Solr collection.
+	 *
+	 * <p>
+	 * This method accepts text content that has been extracted from a file by the
+	 * AI chat client. When a user uploads a file (PDF, Word, etc.) through their
+	 * chat client, the client extracts the text and passes it to this tool along
+	 * with the original filename.
+	 *
+	 * <p>
+	 * A single SolrInputDocument is created with the following fields:
+	 *
+	 * <ul>
+	 * <li><strong>id</strong> - Auto-generated UUID
+	 * <li><strong>content</strong> - The extracted text content
+	 * <li><strong>filename</strong> - The original filename
+	 * </ul>
+	 *
+	 * @param collection
+	 *            the name of the Solr collection to index into
+	 * @param content
+	 *            the text content extracted from the file
+	 * @param filename
+	 *            the original filename (e.g. "report.pdf")
+	 * @throws IOException
+	 *             if there are I/O errors during Solr communication
+	 * @throws SolrServerException
+	 *             if Solr server encounters errors during indexing
+	 * @see IndexingDocumentCreator#createSchemalessDocumentsFromFile(String,
+	 *      String)
+	 * @see #indexDocuments(String, List)
+	 */
+	@PreAuthorize("isAuthenticated()")
+	@McpTool(name = "index-file-document", description = "Index a document from file content into a Solr collection. "
+			+ "Use this when a user uploads a file (PDF, Word, Excel, etc.) and the text has "
+			+ "already been extracted by the chat client. Pass the extracted text as content "
+			+ "along with the original filename.")
+	public void indexFileDocument(@McpToolParam(description = "Solr collection to index into") String collection,
+			@McpToolParam(description = "Text content extracted from the file") String content,
+			@McpToolParam(description = "Original filename with extension (e.g. 'report.pdf')") String filename)
+			throws IOException, SolrServerException {
+		List<SolrInputDocument> documents = indexingDocumentCreator.createSchemalessDocumentsFromFile(content,
+				filename);
+		indexDocuments(collection, documents);
+	}
+
 	/**
 	 * Indexes a list of SolrInputDocument objects into a Solr collection using
 	 * batch processing.

diff --git a/src/main/java/org/apache/solr/mcp/server/indexing/documentcreator/FileDocumentCreator.java b/src/main/java/org/apache/solr/mcp/server/indexing/documentcreator/FileDocumentCreator.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.mcp.server.indexing.documentcreator;
+
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.UUID;
+import org.apache.solr.common.SolrInputDocument;
+import org.springframework.stereotype.Component;
+
+/**
+ * Creates a SolrInputDocument from text content extracted from a file.
+ *
+ * <p>
+ * This creator handles documents uploaded through AI chat clients, where the
+ * client has already extracted the text content from the original file (PDF,
+ * Word, etc.). It produces a single SolrInputDocument containing the text
+ * content and the original filename as metadata.
+ *
+ * <p>
+ * This class does not implement {@link SolrDocumentCreator} because it requires
+ * a filename parameter in addition to the content string.
+ *
+ * @see IndexingDocumentCreator#createSchemalessDocumentsFromFile(String,
+ *      String)
+ */
+@Component
+public class FileDocumentCreator {
+
+	private static final int MAX_INPUT_SIZE_BYTES = 10 * 1024 * 1024;
+
+	/**
+	 * Creates a SolrInputDocument from the provided text content and filename.
+	 *
+	 * @param content
+	 *            the text content extracted from the file
+	 * @param filename
+	 *            the original filename (stored as metadata for search and
+	 *            filtering)
+	 * @return a list containing a single SolrInputDocument
+	 * @throws DocumentProcessingException
+	 *             if the content is null, empty, or exceeds the size limit
+	 */
+	public List<SolrInputDocument> create(String content, String filename) throws DocumentProcessingException {
+		if (content == null || content.isBlank()) {
+			throw new DocumentProcessingException("File content cannot be null or empty");
+		}
+		if (filename == null || filename.isBlank()) {
+			throw new DocumentProcessingException("Filename cannot be null or empty");
+		}
+		if (content.getBytes(StandardCharsets.UTF_8).length > MAX_INPUT_SIZE_BYTES) {
+			throw new DocumentProcessingException(
+					"Input too large: exceeds maximum size of " + MAX_INPUT_SIZE_BYTES + " bytes");
+		}
+
+		SolrInputDocument doc = new SolrInputDocument();
+		doc.addField("id", UUID.randomUUID().toString());
+		doc.addField("content", content);
+		doc.addField("filename", filename);
+		return List.of(doc);
+	}
+
+}
diff --git a/...ain/java/org/apache/solr/mcp/server/indexing/documentcreator/IndexingDocumentCreator.java b/...ain/java/org/apache/solr/mcp/server/indexing/documentcreator/IndexingDocumentCreator.java
@@ -62,11 +62,14 @@ public class IndexingDocumentCreator {
 
 	private final JsonDocumentCreator jsonDocumentCreator;
 
+	private final FileDocumentCreator fileDocumentCreator;
+
 	public IndexingDocumentCreator(XmlDocumentCreator xmlDocumentCreator, CsvDocumentCreator csvDocumentCreator,
-			JsonDocumentCreator jsonDocumentCreator) {
+			JsonDocumentCreator jsonDocumentCreator, FileDocumentCreator fileDocumentCreator) {
 		this.xmlDocumentCreator = xmlDocumentCreator;
 		this.csvDocumentCreator = csvDocumentCreator;
 		this.jsonDocumentCreator = jsonDocumentCreator;
+		this.fileDocumentCreator = fileDocumentCreator;
 	}
 
 	/**
@@ -134,4 +137,31 @@ public List<SolrInputDocument> createSchemalessDocumentsFromXml(String xml) thro
 
 		return xmlDocumentCreator.create(xml);
 	}
+
+	/**
+	 * Creates a SolrInputDocument from text content extracted from a file.
+	 *
+	 * <p>
+	 * This method is intended for documents uploaded through AI chat clients, where
+	 * the client has already extracted the text content from the original file.
+	 *
+	 * @param content
+	 *            the text content extracted from the file
+	 * @param filename
+	 *            the original filename (stored as metadata)
+	 * @return list of SolrInputDocument objects ready for indexing
+	 * @throws DocumentProcessingException
+	 *             if content extraction fails
+	 * @see FileDocumentCreator
+	 */
+	public List<SolrInputDocument> createSchemalessDocumentsFromFile(String content, String filename)
+			throws DocumentProcessingException {
+		if (content == null || content.isBlank()) {
+			throw new IllegalArgumentException("File content cannot be null or empty");
+		}
+		if (filename == null || filename.isBlank()) {
+			throw new IllegalArgumentException("Filename cannot be null or empty");
+		}
+		return fileDocumentCreator.create(content, filename);
+	}
 }
diff --git a/src/test/java/org/apache/solr/mcp/server/indexing/FileIndexingTest.java b/src/test/java/org/apache/solr/mcp/server/indexing/FileIndexingTest.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.mcp.server.indexing;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.util.List;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.mcp.server.indexing.documentcreator.IndexingDocumentCreator;
+import org.junit.jupiter.api.Test;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.test.context.TestPropertySource;
+
+/**
+ * Integration test for file document indexing through
+ * {@link IndexingDocumentCreator}.
+ */
+@SpringBootTest
+@TestPropertySource(locations = "classpath:application.properties")
+class FileIndexingTest {
+
+	@Autowired
+	private IndexingDocumentCreator indexingDocumentCreator;
+
+	@Test
+	void testCreateSchemalessDocumentsFromFile() throws Exception {
+		String content = "This is the text extracted from a PDF about Apache Solr full-text search.";
+
+		List<SolrInputDocument> documents = indexingDocumentCreator.createSchemalessDocumentsFromFile(content,
+				"test-document.pdf");
+
+		assertThat(documents).hasSize(1);
+
+		SolrInputDocument doc = documents.getFirst();
+		assertThat(doc.getFieldValue("content")).isEqualTo(content);
+		assertThat(doc.getFieldValue("filename")).isEqualTo("test-document.pdf");
+		assertThat(doc.getFieldValue("id")).isNotNull();
+	}
+
+	@Test
+	void testCreateSchemalessDocumentsFromFileWithNullContent() {
+		assertThatThrownBy(() -> indexingDocumentCreator.createSchemalessDocumentsFromFile(null, "test.txt"))
+				.isInstanceOf(IllegalArgumentException.class).hasMessageContaining("null or empty");
+	}
+
+	@Test
+	void testCreateSchemalessDocumentsFromFileWithNullFilename() {
+		assertThatThrownBy(() -> indexingDocumentCreator.createSchemalessDocumentsFromFile("content", null))
+				.isInstanceOf(IllegalArgumentException.class).hasMessageContaining("null or empty");
+	}
+
+	@Test
+	void testCreateSchemalessDocumentsFromFileWithInvalidContent() {
+		assertThatThrownBy(() -> indexingDocumentCreator.createSchemalessDocumentsFromFile("   ", "test.txt"))
+				.isInstanceOf(IllegalArgumentException.class).hasMessageContaining("null or empty");
+	}
+
+	@Test
+	void testCreateSchemalessDocumentsFromFilePreservesMultilineContent() throws Exception {
+		String content = """
+				Chapter 1: Introduction to Search
+
+				Apache Solr provides distributed indexing, replication, and
+				load-balanced querying. It is designed for scalability and
+				fault tolerance.
+
+				Chapter 2: Getting Started
+				""";
+
+		List<SolrInputDocument> documents = indexingDocumentCreator.createSchemalessDocumentsFromFile(content,
+				"guide.docx");
+
+		assertThat(documents).hasSize(1);
+		assertThat(documents.getFirst().getFieldValue("content").toString()).contains("Chapter 1");
+		assertThat(documents.getFirst().getFieldValue("content").toString()).contains("scalability");
+	}
+
+}
diff --git a/src/test/java/org/apache/solr/mcp/server/indexing/IndexingServiceDirectTest.java b/src/test/java/org/apache/solr/mcp/server/indexing/IndexingServiceDirectTest.java
@@ -47,7 +47,7 @@ class IndexingServiceDirectTest {
 	@BeforeEach
 	void setUp() {
 		indexingDocumentCreator = new IndexingDocumentCreator(new XmlDocumentCreator(), new CsvDocumentCreator(),
-				new JsonDocumentCreator());
+				new JsonDocumentCreator(), new FileDocumentCreator());
 		indexingService = new IndexingService(solrClient, indexingDocumentCreator);
 	}