Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"java.configuration.updateBuildConfiguration": "interactive"
"java.configuration.updateBuildConfiguration": "interactive",
"java.compile.nullAnalysis.mode": "automatic"
}
8 changes: 8 additions & 0 deletions src/main/java/chatbot/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import chatbot.cache.WolframRepository;
import chatbot.lib.api.SPARQL;
import chatbot.lib.api.WikidataSPARQL;
import chatbot.rivescript.RiveScriptBot;
import codeanticode.eliza.ElizaMain;
import com.cloudant.client.api.CloudantClient;
Expand Down Expand Up @@ -113,6 +114,7 @@ public static class Helper {
private WolframRepository wolframRepository;
private String tmdbApiKey;
private SPARQL sparql;
private WikidataSPARQL wikidataSparql;
private JLanguageTool languageTool;

@Autowired
Expand Down Expand Up @@ -154,6 +156,8 @@ public Helper(final CloudantClient cloudantClient,
? new SPARQL(explorerDB)
: SPARQL.disabled();

wikidataSparql = new WikidataSPARQL();

languageTool = new JLanguageTool(new AmericanEnglish());
for (Rule rule : languageTool.getAllActiveRules()) {
if (rule instanceof SpellingCheckRule) {
Expand Down Expand Up @@ -199,6 +203,10 @@ public SPARQL getSparql() {
return sparql;
}

public WikidataSPARQL getWikidataSparql() {
return wikidataSparql;
}

public JLanguageTool getLanguageTool() {
return languageTool;
}
Expand Down
21 changes: 21 additions & 0 deletions src/main/java/chatbot/lib/Utility.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,25 @@ public static String convertDBpediaToWikipediaURL(String url) {
String[] urlsParts = url.split("/");
return "https://en.wikipedia.org/wiki/" + urlsParts[urlsParts.length - 1];
}

public static boolean isWikidataURI(String uri) {
return uri != null && (uri.startsWith("http://www.wikidata.org/entity/") || uri.startsWith("https://www.wikidata.org/entity/"));
}

public static boolean isDBpediaURI(String uri) {
return uri != null && (uri.startsWith("http://dbpedia.org/resource/") || uri.startsWith("https://dbpedia.org/resource/"));
}

/**
* Extracts the Wikidata entity ID (e.g. "Q42") from a Wikidata URI.
*/
public static String extractWikidataEntityId(String uri) {
if (uri == null) return null;
String[] parts = uri.split("/");
String id = parts[parts.length - 1];
if (id.matches("^[QPL]\\d+$")) {
return id;
}
return null;
}
}
152 changes: 152 additions & 0 deletions src/main/java/chatbot/lib/api/WikidataSPARQL.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package chatbot.lib.api;

import chatbot.lib.Constants;
import chatbot.lib.Utility;
import chatbot.lib.request.TemplateType;
import chatbot.lib.response.ResponseData;
import chatbot.lib.response.ResponseType;
import org.apache.jena.query.*;
import org.apache.jena.sparql.engine.http.QueryEngineHTTP;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;

/**
* Service for querying the Wikidata SPARQL endpoint to retrieve entity
* information.
* Mirrors the interface of the DBpedia SPARQL class but targets Wikidata.
*/
public class WikidataSPARQL {
private static final Logger logger = LoggerFactory.getLogger(WikidataSPARQL.class);

private static final String ENDPOINT = "https://query.wikidata.org/sparql";
private static final String PREFIXES = "PREFIX wd: <http://www.wikidata.org/entity/>\n" +
"PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n" +
"PREFIX wikibase: <http://wikiba.se/ontology#>\n" +
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" +
"PREFIX schema: <http://schema.org/>\n";

public WikidataSPARQL() {
}

public String buildQuery(String query) {
return PREFIXES + query;
}

/**
* Retrieves entity information from Wikidata for the given URI.
* Returns a ResponseData card with label, description, image, Wikipedia link,
* and Wikidata link.
*/
public ResponseData getEntityInformation(String uri) {
String entityId = Utility.extractWikidataEntityId(uri);
if (entityId == null) {
return null;
}

String query = buildQuery(
"SELECT ?label ?description ?image ?articleEN WHERE {\n" +
" wd:" + entityId + " rdfs:label ?label . FILTER(lang(?label) = 'en') .\n" +
" OPTIONAL { wd:" + entityId
+ " schema:description ?description . FILTER(lang(?description) = 'en') }\n" +
" OPTIONAL { wd:" + entityId + " wdt:P18 ?image }\n" +
" OPTIONAL {\n" +
" ?articleEN schema:about wd:" + entityId + " ;\n" +
" schema:isPartOf <https://en.wikipedia.org/> .\n" +
" }\n" +
"} LIMIT 1");

QueryExecution queryExecution = null;
ResponseData responseData = null;

try {
queryExecution = executeQuery(query);
Iterator<QuerySolution> results = queryExecution.execSelect();
if (results.hasNext()) {
QuerySolution result = results.next();
responseData = new ResponseData();

// Label
String label = result.get("label").asLiteral().getString();
responseData.setTitle(label);

// Description
if (result.get("description") != null) {
responseData.setText(result.get("description").asLiteral().getString());
}

// Thumbnail / Image
if (result.get("image") != null) {
responseData.setImage(result.get("image").toString());
}

// Wikipedia link
if (result.get("articleEN") != null) {
responseData.addButton(new ResponseData.Button("View in Wikipedia", ResponseType.BUTTON_LINK,
result.get("articleEN").toString()));
}

// Wikidata link
responseData.addButton(new ResponseData.Button("View in Wikidata", ResponseType.BUTTON_LINK, uri));

// Learn More button (reuses the existing template mechanism)
responseData.addButton(new ResponseData.Button("Learn More", ResponseType.BUTTON_PARAM,
TemplateType.LEARN_MORE + Utility.STRING_SEPARATOR + uri + Utility.STRING_SEPARATOR + label));
}
} catch (Exception e) {
logger.error("Error querying Wikidata for entity: " + uri, e);
} finally {
if (queryExecution != null) {
queryExecution.close();
}
}
return responseData;
}

/**
* Retrieves the English label for a Wikidata entity.
*/
public String getLabel(String uri) {
String entityId = Utility.extractWikidataEntityId(uri);
if (entityId == null)
return null;

String query = buildQuery(
"SELECT ?label WHERE {\n" +
" wd:" + entityId + " rdfs:label ?label . FILTER(lang(?label) = 'en') .\n" +
"} LIMIT 1");

QueryExecution queryExecution = executeQuery(query);
String label = null;

try {
Iterator<QuerySolution> results = queryExecution.execSelect();
if (results.hasNext()) {
label = results.next().get("label").asLiteral().getString();
}
}
catch (Exception e) {
logger.error("Error querying Wikidata for label: " + uri, e);
}
finally {
if (queryExecution != null) {
queryExecution.close();
}
}
return label;
}

/**
* Executes a SPARQL query against the Wikidata endpoint.
*/
public QueryExecution executeQuery(String queryString) {
logger.info("Wikidata SPARQL Query is:\n" + queryString);
Query query = QueryFactory.create(queryString);
QueryEngineHTTP queryEngine = (QueryEngineHTTP) QueryExecutionFactory.sparqlService(ENDPOINT, query);
queryEngine.addParam("timeout", String.valueOf(Constants.API_TIMEOUT));
// Wikidata requires a User-Agent header
queryEngine.addParam("format", "json");
return queryEngine;
}
}
67 changes: 49 additions & 18 deletions src/main/java/chatbot/lib/api/qa/QANARY.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@
import java.util.List;
import java.util.Map;

/**
* Created by ramgathreya on 7/1/17.
*/
public class QANARY {
private static final Logger logger = LoggerFactory.getLogger(QANARY.class);
private static final String URL = "http://qanswer-core1.univ-st-etienne.fr/api/gerbil";
Expand All @@ -36,50 +33,55 @@ public QANARY() {
this.client = HttpClientBuilder.create().setDefaultRequestConfig(requestConfig).build();
}

private String makeRequest(String question) {
private String makeRequest(String question, String knowledgeBase) {
try {
HttpPost httpPost = new HttpPost(URL);
List<NameValuePair> params = new ArrayList<>();
params.add(new BasicNameValuePair("query", question));
// params.add(new BasicNameValuePair("lang", "it"));
params.add(new BasicNameValuePair("kb", "dbpedia"));
params.add(new BasicNameValuePair("kb", knowledgeBase));

UrlEncodedFormEntity entity = new UrlEncodedFormEntity(params, Consts.UTF_8);
httpPost.setEntity(entity);

HttpResponse response = client.execute(httpPost);

// Error Scenario
if(response.getStatusLine().getStatusCode() >= 400) {
logger.error("QANARY Server could not answer due to: " + response.getStatusLine());
if (response.getStatusLine().getStatusCode() >= 400) {
logger.error("QANARY Server could not answer for kb=" + knowledgeBase + " due to: "
+ response.getStatusLine());
return null;
}

return EntityUtils.toString(response.getEntity());
}
catch(Exception e) {
logger.error(e.getMessage());
} catch (Exception e) {
logger.error("QANARY request failed for kb=" + knowledgeBase + ": " + e.getMessage());
}
return null;
}

// Calls QANARY Service then returns resulting data as a list of Data Objects
public QAService.Data search(String question) throws Exception {
private QAService.Data parseResponse(String response) throws Exception {
QAService.Data data = new QAService.Data();
String response = makeRequest(question);
if(response != null) {
if (response != null) {
ObjectMapper mapper = new ObjectMapper();
JsonNode rootNode = mapper.readTree(response);
JsonNode answers = mapper.readTree(rootNode.findValue("questions").get(0).get("question").get("answers").getTextValue());
JsonNode questions = rootNode.findValue("questions");
if (questions == null || !questions.isArray() || questions.size() == 0) {
return data;
}
JsonNode questionNode = questions.get(0).get("question");
if (questionNode == null || questionNode.get("answers") == null) {
return data;
}
JsonNode answers = mapper.readTree(questionNode.get("answers").getTextValue());

if (answers != null) {
JsonNode bindings = answers.get("results").get("bindings");
for(JsonNode binding : bindings) {
for (JsonNode binding : bindings) {
Iterator<Map.Entry<String, JsonNode>> nodes = binding.getFields();
while (nodes.hasNext()) {
Map.Entry<String, JsonNode> entry = nodes.next();
JsonNode value = entry.getValue();
switch(value.get("type").getTextValue()) {
switch (value.get("type").getTextValue()) {
case "uri":
data.addURI(value.get("value").getTextValue());
break;
Expand All @@ -94,4 +96,33 @@ public QAService.Data search(String question) throws Exception {
return data;
}

public QAService.Data search(String question) throws Exception {

QAService.Data data = new QAService.Data();

// Query DBpedia KB
try {
QAService.Data dbpediaData = parseResponse(makeRequest(question, "dbpedia"));
data.addData(dbpediaData, false);
} catch (Exception e) {
logger.error("DBpedia QANARY query failed: " + e.getMessage());
}

// If DBpedia yielded an answer, return early so we don't pay
// the extra latency waiting for Wikidata.
if (!data.getUris().isEmpty() || !data.getLiterals().isEmpty()) {
return data;
}
Comment on lines +111 to +115
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don’t short-circuit on DBpedia literals.

At Line 113, returning when literals is non-empty skips the Wikidata lookup even though the downstream grounding path only becomes useful when uris are present. In src/main/java/chatbot/lib/api/qa/QAService.java:39-48, WolframAlpha can overwrite QANARY literals later, and in src/main/java/chatbot/lib/handlers/NLHandler.java:94-147 literal responses bypass URI enrichment entirely. A DBpedia literal here can therefore suppress the Wikidata URI that this PR is trying to add.

Minimal fix
-        if (!data.getUris().isEmpty() || !data.getLiterals().isEmpty()) {
+        if (!data.getUris().isEmpty()) {
             return data;
         }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// If DBpedia yielded an answer, return early so we don't pay
// the extra latency waiting for Wikidata.
if (!data.getUris().isEmpty() || !data.getLiterals().isEmpty()) {
return data;
}
// If DBpedia yielded an answer, return early so we don't pay
// the extra latency waiting for Wikidata.
if (!data.getUris().isEmpty()) {
return data;
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/main/java/chatbot/lib/api/qa/QANARY.java` around lines 111 - 115, The
current early-return in QANARY.java uses data.getUris() || data.getLiterals(),
which causes a DBpedia literal to short-circuit and skip the Wikidata lookup;
change the logic to only short-circuit when data.getUris() is non-empty (i.e.,
only return early if data.getUris().isEmpty() is false) so that a DBpedia
literal does not prevent running the Wikidata enrichment; update the condition
around the return (referencing data.getUris() and data.getLiterals() and the
surrounding block) so literals do not bypass the Wikidata path.


// Query Wikidata KB
try {
QAService.Data wikidataData = parseResponse(makeRequest(question, "wikidata"));
data.addData(wikidataData, false);
} catch (Exception e) {
logger.error("Wikidata QANARY query failed, continuing with DBpedia results only: " + e.getMessage());
}

return data;
}

}
33 changes: 23 additions & 10 deletions src/main/java/chatbot/lib/handlers/NLHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import chatbot.Application;
import chatbot.lib.Utility;
import chatbot.lib.api.WikidataSPARQL;
import chatbot.lib.api.StatusCheckService;
import chatbot.lib.api.qa.QAService;
import chatbot.lib.api.SPARQL;
Expand Down Expand Up @@ -111,21 +112,33 @@ else if(uris.size() > 0) {
break;
}

count = helper.getSparql().isDisambiguationPage(uri);
processedResponse.setResponseType(SPARQL.ProcessedResponse.RESPONSE_CAROUSEL);

// Not a disambiguation page
if(count == 0) {
ResponseData responseData = helper.getSparql().getEntityInformation(uri);
// Route Wikidata URIs to WikidataSPARQL
if(Utility.isWikidataURI(uri)) {
processedResponse.setResponseType(SPARQL.ProcessedResponse.RESPONSE_CAROUSEL);
WikidataSPARQL wikidataSparql = helper.getWikidataSparql();
ResponseData responseData = wikidataSparql.getEntityInformation(uri);
if (responseData != null) {
processedResponse.addResponseData(responseData);
}
}
// Disambiguation page
// Route DBpedia URIs to DBpedia SPARQL (existing behavior)
else {
processedResponse.getResponseInfo().setUri(uri).setCount(count).setQueryResultType(SPARQL.ResponseInfo.DISAMBIGUATION_PAGE).setOffset(0).setLimit(ResponseData.MAX_DATA_SIZE);
processedResponse.setResponseData(helper.getSparql().getDisambiguatedEntities(uri, 0, ResponseData.MAX_DATA_SIZE));
return processedResponse;
count = helper.getSparql().isDisambiguationPage(uri);
processedResponse.setResponseType(SPARQL.ProcessedResponse.RESPONSE_CAROUSEL);

// Not a disambiguation page
if(count == 0) {
ResponseData responseData = helper.getSparql().getEntityInformation(uri);
if (responseData != null) {
processedResponse.addResponseData(responseData);
}
}
// Disambiguation page
else {
processedResponse.getResponseInfo().setUri(uri).setCount(count).setQueryResultType(SPARQL.ResponseInfo.DISAMBIGUATION_PAGE).setOffset(0).setLimit(ResponseData.MAX_DATA_SIZE);
processedResponse.setResponseData(helper.getSparql().getDisambiguatedEntities(uri, 0, ResponseData.MAX_DATA_SIZE));
return processedResponse;
}
}
}
}
Expand Down
Loading