diff --git a/src/containers/ResultCard.tsx b/src/containers/ResultCard.tsx
index 4e90082..0c2c836 100644
--- a/src/containers/ResultCard.tsx
+++ b/src/containers/ResultCard.tsx
@@ -122,6 +122,17 @@ const ResultCard = ({
EXACT MATCH
)}
+ {resultWithReplacedLink.providerQueryType ===
+ ProviderQueryType.EXACT_URL_TEXT && (
+ // data-iscapture="true" allow us to immediately dismiss tooltip on user scroll
+
+ DISCUSSION CONTAINS LINK
+
+ )}
+
{resultWithReplacedLink.subSourceName !== "" && (
{
@@ -192,7 +196,12 @@ const debugResults = (providerData: AllProviderResults | undefined) => {
// Split results into the different sources when under debug mode
const haveHnExactResults = hnResults[ProviderQueryType.EXACT_URL]?.length > 0;
const haveRedditExactResults =
+ redditResults[ProviderQueryType.EXACT_URL_TEXT]?.length > 0;
+ const haveHnExactUrlTextResults =
+ hnResults[ProviderQueryType.EXACT_URL_TEXT]?.length > 0;
+ const haveRedditExactUrlTextResults =
redditResults[ProviderQueryType.EXACT_URL]?.length > 0;
+
log.debug(
`Have HN exact: ${haveHnExactResults}, have Reddit exact: ${haveRedditExactResults}`
);
@@ -203,6 +212,8 @@ const debugResults = (providerData: AllProviderResults | undefined) => {
return {
haveHnExactResults,
haveRedditExactResults,
+ haveHnExactUrlTextResults,
+ haveRedditExactUrlTextResults,
haveHnTitleResults,
haveRedditTitleResults,
};
@@ -408,6 +419,8 @@ const Sidebar = () => {
const {
haveHnExactResults,
haveRedditExactResults,
+ haveHnExactUrlTextResults,
+ haveRedditExactUrlTextResults,
haveHnTitleResults,
haveRedditTitleResults,
} = debugResults(providerData);
@@ -648,6 +661,76 @@ const Sidebar = () => {
)}
+
+ {haveHnExactUrlTextResults || haveRedditExactUrlTextResults ? (
+
+
+ Results for{" "}
+
+ {`current URL in post text`}
+
+
+ {" "}
+ ({searchExactUrl}){" "}
+
+
+ {haveHnExactUrlTextResults && (
+
+
+
+
+ )}
+ {haveRedditExactUrlTextResults && (
+
+
+
+ )}
+
+ ) : (
+
+ No results for{" "}
+
+ {`current page title`}
+
+
+ {" "}
+ ({searchTitle}){" "}
+
+
+ )}
+
{haveHnTitleResults || haveRedditTitleResults ? (
diff --git a/src/providers/hackernews.ts b/src/providers/hackernews.ts
index 6b9cbeb..2580376 100644
--- a/src/providers/hackernews.ts
+++ b/src/providers/hackernews.ts
@@ -107,6 +107,46 @@ export class HnResultProvider implements ResultProvider {
};
}
+ async getExactUrlTextResults(url: string): Promise {
+ const encodedUrl = encodeURIComponent(url);
+ const queryString = `query=\"${encodedUrl}\"&tags=story&typoTolerance=false`;
+ const requestUrl = "https://hn.algolia.com/api/v1/search?" + queryString;
+ const res: HnJsonResult = await cachedApiCall(
+ requestUrl,
+ true,
+ CACHE_URL_DURATION_SEC
+ );
+ if (res.nbHits === 0) {
+ log.debug("Hacker News API: No urls found");
+ return {
+ providerName: ProviderType.HACKER_NEWS,
+ queryType: ProviderQueryType.EXACT_URL_TEXT,
+ results: [],
+ };
+ }
+ log.debug("HN Results Pre-translation:");
+ log.debug(res.hits);
+ const itemsAll =
+ res.hits?.map((hnHit) =>
+ translateHnToItem(
+ hnHit,
+ ProviderQueryType.EXACT_URL_TEXT,
+ url,
+ requestUrl
+ )
+ ) || [];
+ log.debug("Hacker News returned results for exact url text search:", {
+ response: res,
+ resultsWithoutDedup: itemsAll,
+ resultsTranslated: itemsAll,
+ });
+ return {
+ providerName: ProviderType.HACKER_NEWS,
+ queryType: ProviderQueryType.EXACT_URL_TEXT,
+ results: itemsAll,
+ };
+ }
+
// Main function to get all relevant results from HN
async getSiteUrlResults(url: string): Promise {
const encodedUrl = encodeURIComponent(url);
diff --git a/src/providers/providers.ts b/src/providers/providers.ts
index dd114b2..ca20d45 100644
--- a/src/providers/providers.ts
+++ b/src/providers/providers.ts
@@ -13,6 +13,7 @@ import { scoreResultsRelevance } from "./scoring";
// All providers must implement these two functions for search
export interface ResultProvider {
getExactUrlResults(url: string): Promise;
+ getExactUrlTextResults(url: string): Promise;
getSiteUrlResults(url: string): Promise;
getTitleResults(url: string, title: string): Promise;
getComments(url: string): Promise;
@@ -26,6 +27,7 @@ export enum ProviderType {
// To indicate inside the result structure, so we know where in the UI to place it
export enum ProviderQueryType {
EXACT_URL = "exact_url",
+ EXACT_URL_TEXT = "exact_url_text",
SITE_URL = "site_url",
TITLE = "title",
}
@@ -193,6 +195,7 @@ export async function fetchDataFromProviders(
const providerPromises: Promise[] = providers
.map((provider) => [
provider.getExactUrlResults(cleanedUrl),
+ provider.getExactUrlTextResults(cleanedUrl),
scoreResultsRelevance(
documentTitle,
provider.getTitleResults(cleanedUrl, documentTitle)
diff --git a/src/providers/reddit.ts b/src/providers/reddit.ts
index 04622f0..c4b8862 100644
--- a/src/providers/reddit.ts
+++ b/src/providers/reddit.ts
@@ -24,7 +24,7 @@ const cheerio = require("cheerio");
export class RedditResultProvider implements ResultProvider {
// Main function to get all relevant results from Reddit
async getExactUrlResults(url: string): Promise {
- const queryString = "sort=top&q=" + encodeURIComponent("url:" + url);
+ const queryString = "sort=top&q=" + encodeURIComponent('url:"' + url + '"');
const requestUrl = "https://old.reddit.com/search?" + queryString;
const data = await cachedApiCall(requestUrl, false, CACHE_URL_DURATION_SEC);
@@ -39,6 +39,7 @@ export class RedditResultProvider implements ResultProvider {
)
)
.toArray();
+ // Remove non-exact url matches
const itemsDeduped = itemsAll.filter(
(item) =>
(item.submittedUrl.endsWith(url) ||
@@ -62,6 +63,39 @@ export class RedditResultProvider implements ResultProvider {
};
}
+ // Main function to get all relevant results from Reddit
+ async getExactUrlTextResults(url: string): Promise {
+ const queryString = 'sort=relevance&q="' + encodeURIComponent(url) + '"';
+ const requestUrl = "https://old.reddit.com/search?" + queryString;
+ const data = await cachedApiCall(requestUrl, false, CACHE_URL_DURATION_SEC);
+
+ const $ = cheerio.load(data);
+ const itemsAll: ResultItem[] = $(".search-result.search-result-link")
+ .map((i: number, el: Element) =>
+ this.translateRedditToItem(
+ $(el).html(),
+ ProviderQueryType.EXACT_URL_TEXT,
+ url,
+ requestUrl
+ )
+ )
+ .toArray();
+
+ if (itemsAll.length === 0) {
+ return {
+ providerName: ProviderType.REDDIT,
+ queryType: ProviderQueryType.EXACT_URL_TEXT,
+ results: [],
+ };
+ }
+
+ return {
+ providerName: ProviderType.REDDIT,
+ queryType: ProviderQueryType.EXACT_URL_TEXT,
+ results: itemsAll,
+ };
+ }
+
async getSiteUrlResults(url: string): Promise {
const queryString = "sort=top&q=" + encodeURIComponent("site:" + url);
const requestUrl = "https://old.reddit.com/search?" + queryString;
@@ -220,7 +254,6 @@ export class RedditResultProvider implements ResultProvider {
providerRequestUrl: string
): ResultItem {
const $ = cheerio.load(html);
-
const url = $(".search-link").attr("href");
const commentsText = $(".search-comments").text();
const commentsLink = $(".search-comments").attr("href");
diff --git a/src/shared/constants.ts b/src/shared/constants.ts
index 0e93a47..39f6770 100644
--- a/src/shared/constants.ts
+++ b/src/shared/constants.ts
@@ -15,7 +15,7 @@ export const EVENTS_HOST =
export const ML_HOST =
"https://crowdwise-ml-jhhom.ondigitalocean.app/api/score_documents";
export const ML_API_KEY = "5b58147b-d869-465a-ab43-41c2ffc29ae0";
-export const ML_FILTER_THRESHOLD = -5.0;
+export const ML_FILTER_THRESHOLD = 0.0;
export const GITHUB_REPOSITORY_LINK =
"https://github.com/usecrowdwise/crowdwise";