diff --git a/.vscode/settings.json b/.vscode/settings.json index c5f3f6b..b84f89c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,4 @@ { - "java.configuration.updateBuildConfiguration": "interactive" + "java.configuration.updateBuildConfiguration": "interactive", + "java.compile.nullAnalysis.mode": "automatic" } \ No newline at end of file diff --git a/src/main/java/chatbot/Application.java b/src/main/java/chatbot/Application.java index 7a9650f..7ac3484 100644 --- a/src/main/java/chatbot/Application.java +++ b/src/main/java/chatbot/Application.java @@ -2,6 +2,7 @@ import chatbot.cache.WolframRepository; import chatbot.lib.api.SPARQL; +import chatbot.lib.api.WikidataSPARQL; import chatbot.rivescript.RiveScriptBot; import codeanticode.eliza.ElizaMain; import com.cloudant.client.api.CloudantClient; @@ -113,6 +114,7 @@ public static class Helper { private WolframRepository wolframRepository; private String tmdbApiKey; private SPARQL sparql; + private WikidataSPARQL wikidataSparql; private JLanguageTool languageTool; @Autowired @@ -154,6 +156,8 @@ public Helper(final CloudantClient cloudantClient, ? new SPARQL(explorerDB) : SPARQL.disabled(); + wikidataSparql = new WikidataSPARQL(); + languageTool = new JLanguageTool(new AmericanEnglish()); for (Rule rule : languageTool.getAllActiveRules()) { if (rule instanceof SpellingCheckRule) { @@ -199,6 +203,10 @@ public SPARQL getSparql() { return sparql; } + public WikidataSPARQL getWikidataSparql() { + return wikidataSparql; + } + public JLanguageTool getLanguageTool() { return languageTool; } diff --git a/src/main/java/chatbot/lib/Utility.java b/src/main/java/chatbot/lib/Utility.java index 5680620..9d6479e 100644 --- a/src/main/java/chatbot/lib/Utility.java +++ b/src/main/java/chatbot/lib/Utility.java @@ -104,4 +104,25 @@ public static String convertDBpediaToWikipediaURL(String url) { String[] urlsParts = url.split("/"); return "https://en.wikipedia.org/wiki/" + urlsParts[urlsParts.length - 1]; } + + public static boolean isWikidataURI(String uri) { + return uri != null && (uri.startsWith("http://www.wikidata.org/entity/") || uri.startsWith("https://www.wikidata.org/entity/")); + } + + public static boolean isDBpediaURI(String uri) { + return uri != null && (uri.startsWith("http://dbpedia.org/resource/") || uri.startsWith("https://dbpedia.org/resource/")); + } + + /** + * Extracts the Wikidata entity ID (e.g. "Q42") from a Wikidata URI. + */ + public static String extractWikidataEntityId(String uri) { + if (uri == null) return null; + String[] parts = uri.split("/"); + String id = parts[parts.length - 1]; + if (id.matches("^[QPL]\\d+$")) { + return id; + } + return null; + } } diff --git a/src/main/java/chatbot/lib/api/WikidataSPARQL.java b/src/main/java/chatbot/lib/api/WikidataSPARQL.java new file mode 100644 index 0000000..b0bb11d --- /dev/null +++ b/src/main/java/chatbot/lib/api/WikidataSPARQL.java @@ -0,0 +1,152 @@ +package chatbot.lib.api; + +import chatbot.lib.Constants; +import chatbot.lib.Utility; +import chatbot.lib.request.TemplateType; +import chatbot.lib.response.ResponseData; +import chatbot.lib.response.ResponseType; +import org.apache.jena.query.*; +import org.apache.jena.sparql.engine.http.QueryEngineHTTP; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +/** + * Service for querying the Wikidata SPARQL endpoint to retrieve entity + * information. + * Mirrors the interface of the DBpedia SPARQL class but targets Wikidata. + */ +public class WikidataSPARQL { + private static final Logger logger = LoggerFactory.getLogger(WikidataSPARQL.class); + + private static final String ENDPOINT = "https://query.wikidata.org/sparql"; + private static final String PREFIXES = "PREFIX wd: \n" + + "PREFIX wdt: \n" + + "PREFIX wikibase: \n" + + "PREFIX rdfs: \n" + + "PREFIX schema: \n"; + + public WikidataSPARQL() { + } + + public String buildQuery(String query) { + return PREFIXES + query; + } + + /** + * Retrieves entity information from Wikidata for the given URI. + * Returns a ResponseData card with label, description, image, Wikipedia link, + * and Wikidata link. + */ + public ResponseData getEntityInformation(String uri) { + String entityId = Utility.extractWikidataEntityId(uri); + if (entityId == null) { + return null; + } + + String query = buildQuery( + "SELECT ?label ?description ?image ?articleEN WHERE {\n" + + " wd:" + entityId + " rdfs:label ?label . FILTER(lang(?label) = 'en') .\n" + + " OPTIONAL { wd:" + entityId + + " schema:description ?description . FILTER(lang(?description) = 'en') }\n" + + " OPTIONAL { wd:" + entityId + " wdt:P18 ?image }\n" + + " OPTIONAL {\n" + + " ?articleEN schema:about wd:" + entityId + " ;\n" + + " schema:isPartOf .\n" + + " }\n" + + "} LIMIT 1"); + + QueryExecution queryExecution = null; + ResponseData responseData = null; + + try { + queryExecution = executeQuery(query); + Iterator results = queryExecution.execSelect(); + if (results.hasNext()) { + QuerySolution result = results.next(); + responseData = new ResponseData(); + + // Label + String label = result.get("label").asLiteral().getString(); + responseData.setTitle(label); + + // Description + if (result.get("description") != null) { + responseData.setText(result.get("description").asLiteral().getString()); + } + + // Thumbnail / Image + if (result.get("image") != null) { + responseData.setImage(result.get("image").toString()); + } + + // Wikipedia link + if (result.get("articleEN") != null) { + responseData.addButton(new ResponseData.Button("View in Wikipedia", ResponseType.BUTTON_LINK, + result.get("articleEN").toString())); + } + + // Wikidata link + responseData.addButton(new ResponseData.Button("View in Wikidata", ResponseType.BUTTON_LINK, uri)); + + // Learn More button (reuses the existing template mechanism) + responseData.addButton(new ResponseData.Button("Learn More", ResponseType.BUTTON_PARAM, + TemplateType.LEARN_MORE + Utility.STRING_SEPARATOR + uri + Utility.STRING_SEPARATOR + label)); + } + } catch (Exception e) { + logger.error("Error querying Wikidata for entity: " + uri, e); + } finally { + if (queryExecution != null) { + queryExecution.close(); + } + } + return responseData; + } + + /** + * Retrieves the English label for a Wikidata entity. + */ + public String getLabel(String uri) { + String entityId = Utility.extractWikidataEntityId(uri); + if (entityId == null) + return null; + + String query = buildQuery( + "SELECT ?label WHERE {\n" + + " wd:" + entityId + " rdfs:label ?label . FILTER(lang(?label) = 'en') .\n" + + "} LIMIT 1"); + + QueryExecution queryExecution = executeQuery(query); + String label = null; + + try { + Iterator results = queryExecution.execSelect(); + if (results.hasNext()) { + label = results.next().get("label").asLiteral().getString(); + } + } + catch (Exception e) { + logger.error("Error querying Wikidata for label: " + uri, e); + } + finally { + if (queryExecution != null) { + queryExecution.close(); + } + } + return label; + } + + /** + * Executes a SPARQL query against the Wikidata endpoint. + */ + public QueryExecution executeQuery(String queryString) { + logger.info("Wikidata SPARQL Query is:\n" + queryString); + Query query = QueryFactory.create(queryString); + QueryEngineHTTP queryEngine = (QueryEngineHTTP) QueryExecutionFactory.sparqlService(ENDPOINT, query); + queryEngine.addParam("timeout", String.valueOf(Constants.API_TIMEOUT)); + // Wikidata requires a User-Agent header + queryEngine.addParam("format", "json"); + return queryEngine; + } +} diff --git a/src/main/java/chatbot/lib/api/qa/QANARY.java b/src/main/java/chatbot/lib/api/qa/QANARY.java index 969cffb..7912ef2 100644 --- a/src/main/java/chatbot/lib/api/qa/QANARY.java +++ b/src/main/java/chatbot/lib/api/qa/QANARY.java @@ -22,9 +22,6 @@ import java.util.List; import java.util.Map; -/** - * Created by ramgathreya on 7/1/17. - */ public class QANARY { private static final Logger logger = LoggerFactory.getLogger(QANARY.class); private static final String URL = "http://qanswer-core1.univ-st-etienne.fr/api/gerbil"; @@ -36,13 +33,12 @@ public QANARY() { this.client = HttpClientBuilder.create().setDefaultRequestConfig(requestConfig).build(); } - private String makeRequest(String question) { + private String makeRequest(String question, String knowledgeBase) { try { HttpPost httpPost = new HttpPost(URL); List params = new ArrayList<>(); params.add(new BasicNameValuePair("query", question)); -// params.add(new BasicNameValuePair("lang", "it")); - params.add(new BasicNameValuePair("kb", "dbpedia")); + params.add(new BasicNameValuePair("kb", knowledgeBase)); UrlEncodedFormEntity entity = new UrlEncodedFormEntity(params, Consts.UTF_8); httpPost.setEntity(entity); @@ -50,36 +46,42 @@ private String makeRequest(String question) { HttpResponse response = client.execute(httpPost); // Error Scenario - if(response.getStatusLine().getStatusCode() >= 400) { - logger.error("QANARY Server could not answer due to: " + response.getStatusLine()); + if (response.getStatusLine().getStatusCode() >= 400) { + logger.error("QANARY Server could not answer for kb=" + knowledgeBase + " due to: " + + response.getStatusLine()); return null; } return EntityUtils.toString(response.getEntity()); - } - catch(Exception e) { - logger.error(e.getMessage()); + } catch (Exception e) { + logger.error("QANARY request failed for kb=" + knowledgeBase + ": " + e.getMessage()); } return null; } - // Calls QANARY Service then returns resulting data as a list of Data Objects - public QAService.Data search(String question) throws Exception { + private QAService.Data parseResponse(String response) throws Exception { QAService.Data data = new QAService.Data(); - String response = makeRequest(question); - if(response != null) { + if (response != null) { ObjectMapper mapper = new ObjectMapper(); JsonNode rootNode = mapper.readTree(response); - JsonNode answers = mapper.readTree(rootNode.findValue("questions").get(0).get("question").get("answers").getTextValue()); + JsonNode questions = rootNode.findValue("questions"); + if (questions == null || !questions.isArray() || questions.size() == 0) { + return data; + } + JsonNode questionNode = questions.get(0).get("question"); + if (questionNode == null || questionNode.get("answers") == null) { + return data; + } + JsonNode answers = mapper.readTree(questionNode.get("answers").getTextValue()); if (answers != null) { JsonNode bindings = answers.get("results").get("bindings"); - for(JsonNode binding : bindings) { + for (JsonNode binding : bindings) { Iterator> nodes = binding.getFields(); while (nodes.hasNext()) { Map.Entry entry = nodes.next(); JsonNode value = entry.getValue(); - switch(value.get("type").getTextValue()) { + switch (value.get("type").getTextValue()) { case "uri": data.addURI(value.get("value").getTextValue()); break; @@ -94,4 +96,33 @@ public QAService.Data search(String question) throws Exception { return data; } + public QAService.Data search(String question) throws Exception { + + QAService.Data data = new QAService.Data(); + + // Query DBpedia KB + try { + QAService.Data dbpediaData = parseResponse(makeRequest(question, "dbpedia")); + data.addData(dbpediaData, false); + } catch (Exception e) { + logger.error("DBpedia QANARY query failed: " + e.getMessage()); + } + + // If DBpedia yielded an answer, return early so we don't pay + // the extra latency waiting for Wikidata. + if (!data.getUris().isEmpty() || !data.getLiterals().isEmpty()) { + return data; + } + + // Query Wikidata KB + try { + QAService.Data wikidataData = parseResponse(makeRequest(question, "wikidata")); + data.addData(wikidataData, false); + } catch (Exception e) { + logger.error("Wikidata QANARY query failed, continuing with DBpedia results only: " + e.getMessage()); + } + + return data; + } + } diff --git a/src/main/java/chatbot/lib/handlers/NLHandler.java b/src/main/java/chatbot/lib/handlers/NLHandler.java index ec225d1..c941566 100644 --- a/src/main/java/chatbot/lib/handlers/NLHandler.java +++ b/src/main/java/chatbot/lib/handlers/NLHandler.java @@ -2,6 +2,7 @@ import chatbot.Application; import chatbot.lib.Utility; +import chatbot.lib.api.WikidataSPARQL; import chatbot.lib.api.StatusCheckService; import chatbot.lib.api.qa.QAService; import chatbot.lib.api.SPARQL; @@ -111,21 +112,33 @@ else if(uris.size() > 0) { break; } - count = helper.getSparql().isDisambiguationPage(uri); - processedResponse.setResponseType(SPARQL.ProcessedResponse.RESPONSE_CAROUSEL); - - // Not a disambiguation page - if(count == 0) { - ResponseData responseData = helper.getSparql().getEntityInformation(uri); + // Route Wikidata URIs to WikidataSPARQL + if(Utility.isWikidataURI(uri)) { + processedResponse.setResponseType(SPARQL.ProcessedResponse.RESPONSE_CAROUSEL); + WikidataSPARQL wikidataSparql = helper.getWikidataSparql(); + ResponseData responseData = wikidataSparql.getEntityInformation(uri); if (responseData != null) { processedResponse.addResponseData(responseData); } } - // Disambiguation page + // Route DBpedia URIs to DBpedia SPARQL (existing behavior) else { - processedResponse.getResponseInfo().setUri(uri).setCount(count).setQueryResultType(SPARQL.ResponseInfo.DISAMBIGUATION_PAGE).setOffset(0).setLimit(ResponseData.MAX_DATA_SIZE); - processedResponse.setResponseData(helper.getSparql().getDisambiguatedEntities(uri, 0, ResponseData.MAX_DATA_SIZE)); - return processedResponse; + count = helper.getSparql().isDisambiguationPage(uri); + processedResponse.setResponseType(SPARQL.ProcessedResponse.RESPONSE_CAROUSEL); + + // Not a disambiguation page + if(count == 0) { + ResponseData responseData = helper.getSparql().getEntityInformation(uri); + if (responseData != null) { + processedResponse.addResponseData(responseData); + } + } + // Disambiguation page + else { + processedResponse.getResponseInfo().setUri(uri).setCount(count).setQueryResultType(SPARQL.ResponseInfo.DISAMBIGUATION_PAGE).setOffset(0).setLimit(ResponseData.MAX_DATA_SIZE); + processedResponse.setResponseData(helper.getSparql().getDisambiguatedEntities(uri, 0, ResponseData.MAX_DATA_SIZE)); + return processedResponse; + } } } } diff --git a/src/main/java/chatbot/lib/handlers/TemplateHandler.java b/src/main/java/chatbot/lib/handlers/TemplateHandler.java index 48f25a5..1ba5338 100644 --- a/src/main/java/chatbot/lib/handlers/TemplateHandler.java +++ b/src/main/java/chatbot/lib/handlers/TemplateHandler.java @@ -3,6 +3,7 @@ import chatbot.Application; import chatbot.lib.Constants; import chatbot.lib.Utility; +import chatbot.lib.api.WikidataSPARQL; import chatbot.lib.handlers.dbpedia.StatusCheckHandler; import chatbot.lib.handlers.templates.dbpedia.*; import chatbot.lib.handlers.templates.OptionsTemplateHandler; @@ -118,9 +119,16 @@ public ResponseGenerator handleTemplateMessage() { // Get Information for specific Entity case TemplateType.ENTITY_INFORMATION: - responseGenerator.addCarouselResponse(new ArrayList(){{ - add(helper.getSparql().getEntityInformation(payload[1])); - }}); + ResponseData entityInfo = Utility.isWikidataURI(payload[1]) + ? helper.getWikidataSparql().getEntityInformation(payload[1]) + : helper.getSparql().getEntityInformation(payload[1]); + if (entityInfo != null) { + responseGenerator.addCarouselResponse(new ArrayList() {{ + add(entityInfo); + }}); + }else{ + responseGenerator.setNoResultsResponse(request, helper.getRiveScriptBot()); + } break; case TemplateType.GET_LOCATION: diff --git a/src/main/java/chatbot/lib/handlers/templates/OptionsTemplateHandler.java b/src/main/java/chatbot/lib/handlers/templates/OptionsTemplateHandler.java index 4d6ee1a..517a981 100644 --- a/src/main/java/chatbot/lib/handlers/templates/OptionsTemplateHandler.java +++ b/src/main/java/chatbot/lib/handlers/templates/OptionsTemplateHandler.java @@ -84,6 +84,13 @@ private ResponseData getDefaultOptions(String uri, String label) { private ResponseGenerator getLearnMoreOptions(String uri, String label) { ResponseGenerator responseGenerator = new ResponseGenerator(); + + // Wikidata URIs cannot be looked up via DBpedia SPARQL for RDF types, + // so skip the TV/Movie-specific options and show defaults directly + if(Utility.isWikidataURI(uri)) { + return responseGenerator.addSmartReplyResponse(getDefaultOptions(uri, label)); + } + try { String types = helper.getSparql().getRDFTypes(uri); diff --git a/test.java b/test.java new file mode 100644 index 0000000..695a844 --- /dev/null +++ b/test.java @@ -0,0 +1,2 @@ +import org.apache.jena.sparql.engine.http.QueryExecutionHTTP; +class Test {} diff --git a/test2.java b/test2.java new file mode 100644 index 0000000..7391a16 --- /dev/null +++ b/test2.java @@ -0,0 +1,9 @@ +import org.apache.jena.sparql.engine.http.QueryEngineHTTP; + +public class Test { + public static void main(String[] args) { + QueryEngineHTTP q = new QueryEngineHTTP("http://test.com", "SELECT * WHERE {?s ?p ?o}"); + q.addDefaultHeader("User-Agent", "TestClient"); + q.addParam("timeout", "30000"); + } +}