-
-
Notifications
You must be signed in to change notification settings - Fork 126
feat(): map area filter #273
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ import { geocodeAddress } from './services/geocoding/geoCodingService.js'; | |
| import { distanceMeters } from './services/listings/distanceCalculator.js'; | ||
| import { getUserSettings } from './services/storage/settingsStorage.js'; | ||
| import { updateListingDistance } from './services/storage/listingsStorage.js'; | ||
| import booleanPointInPolygon from '@turf/boolean-point-in-polygon'; | ||
|
|
||
| /** | ||
| * @typedef {Object} Listing | ||
|
|
@@ -58,16 +59,17 @@ class FredyPipelineExecutioner { | |
| * @param {(raw:any)=>Listing} providerConfig.normalize Function to convert raw scraped data into a Listing shape. | ||
| * @param {(listing:Listing)=>boolean} providerConfig.filter Function to filter out unwanted listings. | ||
| * @param {(url:string, waitForSelector?:string)=>Promise<void>|Promise<Listing[]>} [providerConfig.getListings] Optional override to fetch listings. | ||
| * | ||
| * @param {Object} notificationConfig Notification configuration passed to notification adapters. | ||
| * @param {Object} spatialFilter Optional spatial filter configuration. | ||
| * @param {string} providerId The ID of the provider currently in use. | ||
| * @param {string} jobKey Key of the job that is currently running (from within the config). | ||
| * @param {SimilarityCache} similarityCache Cache instance for checking similar entries. | ||
| * @param browser | ||
| */ | ||
| constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache, browser) { | ||
| constructor(providerConfig, notificationConfig, spatialFilter, providerId, jobKey, similarityCache, browser) { | ||
| this._providerConfig = providerConfig; | ||
| this._notificationConfig = notificationConfig; | ||
| this._spatialFilter = spatialFilter; | ||
| this._providerId = providerId; | ||
| this._jobKey = jobKey; | ||
| this._similarityCache = similarityCache; | ||
|
|
@@ -87,6 +89,7 @@ class FredyPipelineExecutioner { | |
| .then(this._filter.bind(this)) | ||
| .then(this._findNew.bind(this)) | ||
| .then(this._geocode.bind(this)) | ||
| .then(this._filterByArea.bind(this)) | ||
| .then(this._save.bind(this)) | ||
| .then(this._calculateDistance.bind(this)) | ||
| .then(this._filterBySimilarListings.bind(this)) | ||
|
|
@@ -113,6 +116,38 @@ class FredyPipelineExecutioner { | |
| return newListings; | ||
| } | ||
|
|
||
| /** | ||
| * Filter listings by area using the provider's area filter if available. | ||
| * Only filters if areaFilter is set on the provider AND the listing has coordinates. | ||
| * | ||
| * @param {Listing[]} newListings New listings to filter by area. | ||
| * @returns {Promise<Listing[]>} Resolves with listings that are within the area (or not filtered if no area is set). | ||
| */ | ||
| _filterByArea(newListings) { | ||
| const polygonFeatures = this._spatialFilter?.features?.filter((f) => f.geometry?.type === 'Polygon'); | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As codex said, this is a rather powerful feature, but you probably want to store it and put int just BEFORE the notification so that the entry itself is stored but not forwarded. Otherwise it is going to be rescraped all the time |
||
|
|
||
| // If no area filter is set, return all listings | ||
| if (!polygonFeatures?.length) { | ||
| return newListings; | ||
| } | ||
|
|
||
| // Filter listings by area - keep only those within the polygon | ||
| const filteredListings = newListings.filter((listing) => { | ||
| // If listing doesn't have coordinates, keep it (don't filter out) | ||
| if (listing.latitude == null || listing.longitude == null) { | ||
| return true; | ||
| } | ||
|
|
||
| // Check if the point is inside the polygons | ||
| const point = [listing.longitude, listing.latitude]; // GeoJSON format: [lon, lat] | ||
| const isInPolygon = polygonFeatures.some((feature) => booleanPointInPolygon(point, feature)); | ||
|
|
||
| return isInPolygon; | ||
| }); | ||
|
|
||
| return filteredListings; | ||
| } | ||
|
|
||
| /** | ||
| * Fetch listings from the provider, using the default Extractor flow unless | ||
| * a provider-specific getListings override is supplied. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| /* | ||
| * Copyright (c) 2026 by Christian Kellner. | ||
| * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause | ||
| */ | ||
|
|
||
| // Migration: Add spatial_filter column to jobs table for storing GeoJSON-based spatial filters | ||
| export function up(db) { | ||
| db.exec(` | ||
| ALTER TABLE jobs ADD COLUMN spatial_filter JSONB DEFAULT NULL; | ||
| `); | ||
| } | ||
|
|
||
| export function down(db) { | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A down function is not needed.. |
||
| db.exec(` | ||
| ALTER TABLE jobs DROP COLUMN spatial_filter; | ||
| `); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Running
_filterByAreabefore_savemeans listings outside the polygon are never persisted, so_findNewcannot mark them as seen on later runs. In jobs with an active spatial filter, the same out-of-area listings are reprocessed and re-geocoded every execution, which can repeatedly hit Nominatim and degrade run time until rate limiting kicks in. Persisting hash/dedup information before area filtering (or otherwise recording filtered-out IDs) avoids this repeated external work.Useful? React with 👍 / 👎.