diff --git a/AGENTS.md b/AGENTS.md
index 91f9d58..b8da0d7 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -8,7 +8,7 @@ Solr MCP Server is a Spring AI Model Context Protocol (MCP) server that enables
- **Status:** Apache incubating project (v0.0.2-SNAPSHOT)
- **Java:** 25+ (centralized in build.gradle.kts)
-- **Framework:** Spring Boot 3.5.8, Spring AI 1.1.2
+- **Framework:** Spring Boot 3.5.8, Spring AI 1.1.4
- **License:** Apache 2.0
## Common Commands
@@ -44,7 +44,7 @@ PROFILES=http ./gradlew bootRun # HTTP mode
Four service classes expose MCP tools via `@McpTool` annotations:
- **SearchService** (`search/`) - Full-text search with filtering, faceting, sorting, pagination
-- **IndexingService** (`indexing/`) - Document indexing supporting JSON, CSV, XML formats
+- **IndexingService** (`indexing/`) - Document indexing supporting JSON, CSV, XML formats and file uploads (text extracted by chat client)
- **CollectionService** (`metadata/`) - List collections, get stats, health checks
- **SchemaService** (`metadata/`) - Schema introspection
@@ -53,6 +53,7 @@ Four service classes expose MCP tools via `@McpTool` annotations:
`indexing/documentcreator/` uses strategy pattern for format parsing:
- `SolrDocumentCreator` - Common interface
- `JsonDocumentCreator`, `CsvDocumentCreator`, `XmlDocumentCreator` - Format implementations
+- `FileDocumentCreator` - File content indexing (text already extracted by AI chat client)
- `IndexingDocumentCreator` - Orchestrator that delegates to format-specific creators
- `FieldNameSanitizer` - Automatic field name validation for Solr compatibility
diff --git a/src/main/java/org/apache/solr/mcp/server/indexing/IndexingService.java b/src/main/java/org/apache/solr/mcp/server/indexing/IndexingService.java
index e52ca7b..ff7de74 100644
--- a/src/main/java/org/apache/solr/mcp/server/indexing/IndexingService.java
+++ b/src/main/java/org/apache/solr/mcp/server/indexing/IndexingService.java
@@ -356,6 +356,52 @@ public void indexXmlDocuments(@McpToolParam(description = "Solr collection to in
indexDocuments(collection, schemalessDoc);
}
+ /**
+ * Indexes a document from file content into a Solr collection.
+ *
+ *
+ * This method accepts text content that has been extracted from a file by the
+ * AI chat client. When a user uploads a file (PDF, Word, etc.) through their
+ * chat client, the client extracts the text and passes it to this tool along
+ * with the original filename.
+ *
+ *
+ * A single SolrInputDocument is created with the following fields:
+ *
+ *
documents = indexingDocumentCreator.createSchemalessDocumentsFromFile(content,
+ filename);
+ indexDocuments(collection, documents);
+ }
+
/**
* Indexes a list of SolrInputDocument objects into a Solr collection using
* batch processing.
diff --git a/src/main/java/org/apache/solr/mcp/server/indexing/documentcreator/FileDocumentCreator.java b/src/main/java/org/apache/solr/mcp/server/indexing/documentcreator/FileDocumentCreator.java
new file mode 100644
index 0000000..3235517
--- /dev/null
+++ b/src/main/java/org/apache/solr/mcp/server/indexing/documentcreator/FileDocumentCreator.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.mcp.server.indexing.documentcreator;
+
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.UUID;
+import org.apache.solr.common.SolrInputDocument;
+import org.springframework.stereotype.Component;
+
+/**
+ * Creates a SolrInputDocument from text content extracted from a file.
+ *
+ *
+ * This creator handles documents uploaded through AI chat clients, where the
+ * client has already extracted the text content from the original file (PDF,
+ * Word, etc.). It produces a single SolrInputDocument containing the text
+ * content and the original filename as metadata.
+ *
+ *
+ * This class does not implement {@link SolrDocumentCreator} because it requires
+ * a filename parameter in addition to the content string.
+ *
+ * @see IndexingDocumentCreator#createSchemalessDocumentsFromFile(String,
+ * String)
+ */
+@Component
+public class FileDocumentCreator {
+
+ private static final int MAX_INPUT_SIZE_BYTES = 10 * 1024 * 1024;
+
+ /**
+ * Creates a SolrInputDocument from the provided text content and filename.
+ *
+ * @param content
+ * the text content extracted from the file
+ * @param filename
+ * the original filename (stored as metadata for search and
+ * filtering)
+ * @return a list containing a single SolrInputDocument
+ * @throws DocumentProcessingException
+ * if the content is null, empty, or exceeds the size limit
+ */
+ public List create(String content, String filename) throws DocumentProcessingException {
+ if (content == null || content.isBlank()) {
+ throw new DocumentProcessingException("File content cannot be null or empty");
+ }
+ if (filename == null || filename.isBlank()) {
+ throw new DocumentProcessingException("Filename cannot be null or empty");
+ }
+ if (content.getBytes(StandardCharsets.UTF_8).length > MAX_INPUT_SIZE_BYTES) {
+ throw new DocumentProcessingException(
+ "Input too large: exceeds maximum size of " + MAX_INPUT_SIZE_BYTES + " bytes");
+ }
+
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.addField("id", UUID.randomUUID().toString());
+ doc.addField("content", content);
+ doc.addField("filename", filename);
+ return List.of(doc);
+ }
+
+}
diff --git a/src/main/java/org/apache/solr/mcp/server/indexing/documentcreator/IndexingDocumentCreator.java b/src/main/java/org/apache/solr/mcp/server/indexing/documentcreator/IndexingDocumentCreator.java
index f20489a..0400926 100644
--- a/src/main/java/org/apache/solr/mcp/server/indexing/documentcreator/IndexingDocumentCreator.java
+++ b/src/main/java/org/apache/solr/mcp/server/indexing/documentcreator/IndexingDocumentCreator.java
@@ -62,11 +62,14 @@ public class IndexingDocumentCreator {
private final JsonDocumentCreator jsonDocumentCreator;
+ private final FileDocumentCreator fileDocumentCreator;
+
public IndexingDocumentCreator(XmlDocumentCreator xmlDocumentCreator, CsvDocumentCreator csvDocumentCreator,
- JsonDocumentCreator jsonDocumentCreator) {
+ JsonDocumentCreator jsonDocumentCreator, FileDocumentCreator fileDocumentCreator) {
this.xmlDocumentCreator = xmlDocumentCreator;
this.csvDocumentCreator = csvDocumentCreator;
this.jsonDocumentCreator = jsonDocumentCreator;
+ this.fileDocumentCreator = fileDocumentCreator;
}
/**
@@ -134,4 +137,31 @@ public List createSchemalessDocumentsFromXml(String xml) thro
return xmlDocumentCreator.create(xml);
}
+
+ /**
+ * Creates a SolrInputDocument from text content extracted from a file.
+ *
+ *
+ * This method is intended for documents uploaded through AI chat clients, where
+ * the client has already extracted the text content from the original file.
+ *
+ * @param content
+ * the text content extracted from the file
+ * @param filename
+ * the original filename (stored as metadata)
+ * @return list of SolrInputDocument objects ready for indexing
+ * @throws DocumentProcessingException
+ * if content extraction fails
+ * @see FileDocumentCreator
+ */
+ public List createSchemalessDocumentsFromFile(String content, String filename)
+ throws DocumentProcessingException {
+ if (content == null || content.isBlank()) {
+ throw new IllegalArgumentException("File content cannot be null or empty");
+ }
+ if (filename == null || filename.isBlank()) {
+ throw new IllegalArgumentException("Filename cannot be null or empty");
+ }
+ return fileDocumentCreator.create(content, filename);
+ }
}
diff --git a/src/test/java/org/apache/solr/mcp/server/indexing/FileIndexingTest.java b/src/test/java/org/apache/solr/mcp/server/indexing/FileIndexingTest.java
new file mode 100644
index 0000000..3185e3d
--- /dev/null
+++ b/src/test/java/org/apache/solr/mcp/server/indexing/FileIndexingTest.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.mcp.server.indexing;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.util.List;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.mcp.server.indexing.documentcreator.IndexingDocumentCreator;
+import org.junit.jupiter.api.Test;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.test.context.TestPropertySource;
+
+/**
+ * Integration test for file document indexing through
+ * {@link IndexingDocumentCreator}.
+ */
+@SpringBootTest
+@TestPropertySource(locations = "classpath:application.properties")
+class FileIndexingTest {
+
+ @Autowired
+ private IndexingDocumentCreator indexingDocumentCreator;
+
+ @Test
+ void testCreateSchemalessDocumentsFromFile() throws Exception {
+ String content = "This is the text extracted from a PDF about Apache Solr full-text search.";
+
+ List documents = indexingDocumentCreator.createSchemalessDocumentsFromFile(content,
+ "test-document.pdf");
+
+ assertThat(documents).hasSize(1);
+
+ SolrInputDocument doc = documents.getFirst();
+ assertThat(doc.getFieldValue("content")).isEqualTo(content);
+ assertThat(doc.getFieldValue("filename")).isEqualTo("test-document.pdf");
+ assertThat(doc.getFieldValue("id")).isNotNull();
+ }
+
+ @Test
+ void testCreateSchemalessDocumentsFromFileWithNullContent() {
+ assertThatThrownBy(() -> indexingDocumentCreator.createSchemalessDocumentsFromFile(null, "test.txt"))
+ .isInstanceOf(IllegalArgumentException.class).hasMessageContaining("null or empty");
+ }
+
+ @Test
+ void testCreateSchemalessDocumentsFromFileWithNullFilename() {
+ assertThatThrownBy(() -> indexingDocumentCreator.createSchemalessDocumentsFromFile("content", null))
+ .isInstanceOf(IllegalArgumentException.class).hasMessageContaining("null or empty");
+ }
+
+ @Test
+ void testCreateSchemalessDocumentsFromFileWithInvalidContent() {
+ assertThatThrownBy(() -> indexingDocumentCreator.createSchemalessDocumentsFromFile(" ", "test.txt"))
+ .isInstanceOf(IllegalArgumentException.class).hasMessageContaining("null or empty");
+ }
+
+ @Test
+ void testCreateSchemalessDocumentsFromFilePreservesMultilineContent() throws Exception {
+ String content = """
+ Chapter 1: Introduction to Search
+
+ Apache Solr provides distributed indexing, replication, and
+ load-balanced querying. It is designed for scalability and
+ fault tolerance.
+
+ Chapter 2: Getting Started
+ """;
+
+ List documents = indexingDocumentCreator.createSchemalessDocumentsFromFile(content,
+ "guide.docx");
+
+ assertThat(documents).hasSize(1);
+ assertThat(documents.getFirst().getFieldValue("content").toString()).contains("Chapter 1");
+ assertThat(documents.getFirst().getFieldValue("content").toString()).contains("scalability");
+ }
+
+}
diff --git a/src/test/java/org/apache/solr/mcp/server/indexing/IndexingServiceDirectTest.java b/src/test/java/org/apache/solr/mcp/server/indexing/IndexingServiceDirectTest.java
index 4d57b34..75ffa74 100644
--- a/src/test/java/org/apache/solr/mcp/server/indexing/IndexingServiceDirectTest.java
+++ b/src/test/java/org/apache/solr/mcp/server/indexing/IndexingServiceDirectTest.java
@@ -47,7 +47,7 @@ class IndexingServiceDirectTest {
@BeforeEach
void setUp() {
indexingDocumentCreator = new IndexingDocumentCreator(new XmlDocumentCreator(), new CsvDocumentCreator(),
- new JsonDocumentCreator());
+ new JsonDocumentCreator(), new FileDocumentCreator());
indexingService = new IndexingService(solrClient, indexingDocumentCreator);
}
diff --git a/src/test/java/org/apache/solr/mcp/server/indexing/IndexingServiceTest.java b/src/test/java/org/apache/solr/mcp/server/indexing/IndexingServiceTest.java
index c0bec69..1eb9e1b 100644
--- a/src/test/java/org/apache/solr/mcp/server/indexing/IndexingServiceTest.java
+++ b/src/test/java/org/apache/solr/mcp/server/indexing/IndexingServiceTest.java
@@ -31,6 +31,7 @@
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.mcp.server.TestcontainersConfiguration;
import org.apache.solr.mcp.server.indexing.documentcreator.CsvDocumentCreator;
+import org.apache.solr.mcp.server.indexing.documentcreator.FileDocumentCreator;
import org.apache.solr.mcp.server.indexing.documentcreator.IndexingDocumentCreator;
import org.apache.solr.mcp.server.indexing.documentcreator.JsonDocumentCreator;
import org.apache.solr.mcp.server.indexing.documentcreator.XmlDocumentCreator;
@@ -77,8 +78,9 @@ void setUp() throws Exception {
CsvDocumentCreator csvDocumentCreator = new CsvDocumentCreator();
JsonDocumentCreator jsonDocumentCreator = new JsonDocumentCreator();
+ FileDocumentCreator fileDocumentCreator = new FileDocumentCreator();
indexingDocumentCreator = new IndexingDocumentCreator(xmlDocumentCreator, csvDocumentCreator,
- jsonDocumentCreator);
+ jsonDocumentCreator, fileDocumentCreator);
indexingService = new IndexingService(solrClient, indexingDocumentCreator);
searchService = new SearchService(solrClient);
@@ -757,6 +759,50 @@ void testDirectSanitizeFieldName() throws Exception {
assertEquals("Value 6", doc.getFieldValue("trailing_underscores"));
assertEquals("Value 7", doc.getFieldValue("multiple_underscores"));
}
+
+ @Test
+ void testIndexFileDocumentAndSearch() throws Exception {
+ String content = "Apache Solr provides distributed indexing and search with unique_file_test_marker_42";
+ String filename = "solr-guide.pdf";
+
+ indexingService.indexFileDocument(COLLECTION_NAME, content, filename);
+
+ // Search by content
+ SearchResponse result = searchService.search(COLLECTION_NAME, "content:unique_file_test_marker_42", null, null,
+ null, null, null);
+
+ assertNotNull(result);
+ List