Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Solr MCP Server is a Spring AI Model Context Protocol (MCP) server that enables

- **Status:** Apache incubating project (v0.0.2-SNAPSHOT)
- **Java:** 25+ (centralized in build.gradle.kts)
- **Framework:** Spring Boot 3.5.8, Spring AI 1.1.2
- **Framework:** Spring Boot 3.5.8, Spring AI 1.1.4
- **License:** Apache 2.0

## Common Commands
Expand Down Expand Up @@ -44,7 +44,7 @@ PROFILES=http ./gradlew bootRun # HTTP mode
Four service classes expose MCP tools via `@McpTool` annotations:

- **SearchService** (`search/`) - Full-text search with filtering, faceting, sorting, pagination
- **IndexingService** (`indexing/`) - Document indexing supporting JSON, CSV, XML formats
- **IndexingService** (`indexing/`) - Document indexing supporting JSON, CSV, XML formats and file uploads (text extracted by chat client)
- **CollectionService** (`metadata/`) - List collections, get stats, health checks
- **SchemaService** (`metadata/`) - Schema introspection

Expand All @@ -53,6 +53,7 @@ Four service classes expose MCP tools via `@McpTool` annotations:
`indexing/documentcreator/` uses strategy pattern for format parsing:
- `SolrDocumentCreator` - Common interface
- `JsonDocumentCreator`, `CsvDocumentCreator`, `XmlDocumentCreator` - Format implementations
- `FileDocumentCreator` - File content indexing (text already extracted by AI chat client)
- `IndexingDocumentCreator` - Orchestrator that delegates to format-specific creators
- `FieldNameSanitizer` - Automatic field name validation for Solr compatibility

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,52 @@ public void indexXmlDocuments(@McpToolParam(description = "Solr collection to in
indexDocuments(collection, schemalessDoc);
}

/**
* Indexes a document from file content into a Solr collection.
*
* <p>
* This method accepts text content that has been extracted from a file by the
* AI chat client. When a user uploads a file (PDF, Word, etc.) through their
* chat client, the client extracts the text and passes it to this tool along
* with the original filename.
*
* <p>
* A single SolrInputDocument is created with the following fields:
*
* <ul>
* <li><strong>id</strong> - Auto-generated UUID
* <li><strong>content</strong> - The extracted text content
* <li><strong>filename</strong> - The original filename
* </ul>
*
* @param collection
* the name of the Solr collection to index into
* @param content
* the text content extracted from the file
* @param filename
* the original filename (e.g. "report.pdf")
* @throws IOException
* if there are I/O errors during Solr communication
* @throws SolrServerException
* if Solr server encounters errors during indexing
* @see IndexingDocumentCreator#createSchemalessDocumentsFromFile(String,
* String)
* @see #indexDocuments(String, List)
*/
@PreAuthorize("isAuthenticated()")
@McpTool(name = "index-file-document", description = "Index a document from file content into a Solr collection. "
+ "Use this when a user uploads a file (PDF, Word, Excel, etc.) and the text has "
+ "already been extracted by the chat client. Pass the extracted text as content "
+ "along with the original filename.")
public void indexFileDocument(@McpToolParam(description = "Solr collection to index into") String collection,
@McpToolParam(description = "Text content extracted from the file") String content,
@McpToolParam(description = "Original filename with extension (e.g. 'report.pdf')") String filename)
throws IOException, SolrServerException {
List<SolrInputDocument> documents = indexingDocumentCreator.createSchemalessDocumentsFromFile(content,
filename);
indexDocuments(collection, documents);
}

/**
* Indexes a list of SolrInputDocument objects into a Solr collection using
* batch processing.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.mcp.server.indexing.documentcreator;

import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.UUID;
import org.apache.solr.common.SolrInputDocument;
import org.springframework.stereotype.Component;

/**
* Creates a SolrInputDocument from text content extracted from a file.
*
* <p>
* This creator handles documents uploaded through AI chat clients, where the
* client has already extracted the text content from the original file (PDF,
* Word, etc.). It produces a single SolrInputDocument containing the text
* content and the original filename as metadata.
*
* <p>
* This class does not implement {@link SolrDocumentCreator} because it requires
* a filename parameter in addition to the content string.
*
* @see IndexingDocumentCreator#createSchemalessDocumentsFromFile(String,
* String)
*/
@Component
public class FileDocumentCreator {

private static final int MAX_INPUT_SIZE_BYTES = 10 * 1024 * 1024;

/**
* Creates a SolrInputDocument from the provided text content and filename.
*
* @param content
* the text content extracted from the file
* @param filename
* the original filename (stored as metadata for search and
* filtering)
* @return a list containing a single SolrInputDocument
* @throws DocumentProcessingException
* if the content is null, empty, or exceeds the size limit
*/
public List<SolrInputDocument> create(String content, String filename) throws DocumentProcessingException {
if (content == null || content.isBlank()) {
throw new DocumentProcessingException("File content cannot be null or empty");
}
if (filename == null || filename.isBlank()) {
throw new DocumentProcessingException("Filename cannot be null or empty");
}
if (content.getBytes(StandardCharsets.UTF_8).length > MAX_INPUT_SIZE_BYTES) {
throw new DocumentProcessingException(
"Input too large: exceeds maximum size of " + MAX_INPUT_SIZE_BYTES + " bytes");
}

SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", UUID.randomUUID().toString());
doc.addField("content", content);
doc.addField("filename", filename);
return List.of(doc);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,14 @@ public class IndexingDocumentCreator {

private final JsonDocumentCreator jsonDocumentCreator;

private final FileDocumentCreator fileDocumentCreator;

public IndexingDocumentCreator(XmlDocumentCreator xmlDocumentCreator, CsvDocumentCreator csvDocumentCreator,
JsonDocumentCreator jsonDocumentCreator) {
JsonDocumentCreator jsonDocumentCreator, FileDocumentCreator fileDocumentCreator) {
this.xmlDocumentCreator = xmlDocumentCreator;
this.csvDocumentCreator = csvDocumentCreator;
this.jsonDocumentCreator = jsonDocumentCreator;
this.fileDocumentCreator = fileDocumentCreator;
}

/**
Expand Down Expand Up @@ -134,4 +137,31 @@ public List<SolrInputDocument> createSchemalessDocumentsFromXml(String xml) thro

return xmlDocumentCreator.create(xml);
}

/**
* Creates a SolrInputDocument from text content extracted from a file.
*
* <p>
* This method is intended for documents uploaded through AI chat clients, where
* the client has already extracted the text content from the original file.
*
* @param content
* the text content extracted from the file
* @param filename
* the original filename (stored as metadata)
* @return list of SolrInputDocument objects ready for indexing
* @throws DocumentProcessingException
* if content extraction fails
* @see FileDocumentCreator
*/
public List<SolrInputDocument> createSchemalessDocumentsFromFile(String content, String filename)
throws DocumentProcessingException {
if (content == null || content.isBlank()) {
throw new IllegalArgumentException("File content cannot be null or empty");
}
if (filename == null || filename.isBlank()) {
throw new IllegalArgumentException("Filename cannot be null or empty");
}
return fileDocumentCreator.create(content, filename);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.mcp.server.indexing;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.util.List;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.mcp.server.indexing.documentcreator.IndexingDocumentCreator;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.TestPropertySource;

/**
* Integration test for file document indexing through
* {@link IndexingDocumentCreator}.
*/
@SpringBootTest
@TestPropertySource(locations = "classpath:application.properties")
class FileIndexingTest {

@Autowired
private IndexingDocumentCreator indexingDocumentCreator;

@Test
void testCreateSchemalessDocumentsFromFile() throws Exception {
String content = "This is the text extracted from a PDF about Apache Solr full-text search.";

List<SolrInputDocument> documents = indexingDocumentCreator.createSchemalessDocumentsFromFile(content,
"test-document.pdf");

assertThat(documents).hasSize(1);

SolrInputDocument doc = documents.getFirst();
assertThat(doc.getFieldValue("content")).isEqualTo(content);
assertThat(doc.getFieldValue("filename")).isEqualTo("test-document.pdf");
assertThat(doc.getFieldValue("id")).isNotNull();
}

@Test
void testCreateSchemalessDocumentsFromFileWithNullContent() {
assertThatThrownBy(() -> indexingDocumentCreator.createSchemalessDocumentsFromFile(null, "test.txt"))
.isInstanceOf(IllegalArgumentException.class).hasMessageContaining("null or empty");
}

@Test
void testCreateSchemalessDocumentsFromFileWithNullFilename() {
assertThatThrownBy(() -> indexingDocumentCreator.createSchemalessDocumentsFromFile("content", null))
.isInstanceOf(IllegalArgumentException.class).hasMessageContaining("null or empty");
}

@Test
void testCreateSchemalessDocumentsFromFileWithInvalidContent() {
assertThatThrownBy(() -> indexingDocumentCreator.createSchemalessDocumentsFromFile(" ", "test.txt"))
.isInstanceOf(IllegalArgumentException.class).hasMessageContaining("null or empty");
}

@Test
void testCreateSchemalessDocumentsFromFilePreservesMultilineContent() throws Exception {
String content = """
Chapter 1: Introduction to Search

Apache Solr provides distributed indexing, replication, and
load-balanced querying. It is designed for scalability and
fault tolerance.

Chapter 2: Getting Started
""";

List<SolrInputDocument> documents = indexingDocumentCreator.createSchemalessDocumentsFromFile(content,
"guide.docx");

assertThat(documents).hasSize(1);
assertThat(documents.getFirst().getFieldValue("content").toString()).contains("Chapter 1");
assertThat(documents.getFirst().getFieldValue("content").toString()).contains("scalability");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class IndexingServiceDirectTest {
@BeforeEach
void setUp() {
indexingDocumentCreator = new IndexingDocumentCreator(new XmlDocumentCreator(), new CsvDocumentCreator(),
new JsonDocumentCreator());
new JsonDocumentCreator(), new FileDocumentCreator());
indexingService = new IndexingService(solrClient, indexingDocumentCreator);
}

Expand Down
Loading