From 2886ddc59b31f73198ec8448ff4cbfc3a93836a4 Mon Sep 17 00:00:00 2001 From: Katrina Prosise Date: Fri, 9 Jan 2026 06:26:07 -0500 Subject: [PATCH] Update scraper for better search ranking scores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ran scraper (with replaced variables) to a test index. Top results are more relevant when tested with common searches such as "update", "fioctl" and "aklite". This commit addresses FFTK-4435, "Improve search results…" Signed-off-by: Katrina Prosise --- scraper.json | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/scraper.json b/scraper.json index a43abb386..6e1821dc3 100644 --- a/scraper.json +++ b/scraper.json @@ -1,37 +1,37 @@ { - "index_uid": "UID", + "index_uid": "INDEX_VERSION", "start_urls": [ { - "url": "https://docs.foundries.io/latest/" + "url": "https://docs.foundries.io/RELEASE/getting-started/", + "page_rank": 1 + }, + { + "url": "https://docs.foundries.io/RELEASE/user-guide/", + "page_rank": 2 + }, + { + "url": "https://docs.foundries.io/RELEASE/reference-manual/", + "page_rank": 3 + }, + { + "url": "https://docs.foundries.io/RELEASE/appendix/", + "page_rank": 4 } ], "stop_urls": [], "selectors": { "lvl0": { - "selector": "title", - "global": true, + "selector": "h1", + "global": false, "default_value": "Documentation" }, - "lvl1": "h1", - "lvl2": "h2", - "lvl3": "h3", - "lvl4": "h4", - "lvl5": "h5", - "lvl6": "h6", + "lvl1": "h2", + "lvl2": "h3", + "lvl3": "h4", + "lvl4": "h5", + "lvl5": "h6", "text": "p" }, - "min_indexed_level": 2, - "scrap_start_urls": true, - "strip_chars": " .,;:#", - "custom_settings": { - "synonyms": { - "Linux microPlatform": [ - "LmP" - ], - "LmP": [ - "Linux microPlatform" - ] - } - } + "scrap_start_urls": false, + "strip_chars": " .,;:#" } -