diff --git a/lib/FredyPipelineExecutioner.js b/lib/FredyPipelineExecutioner.js index 184d897a..e246dc99 100755 --- a/lib/FredyPipelineExecutioner.js +++ b/lib/FredyPipelineExecutioner.js @@ -63,13 +63,15 @@ class FredyPipelineExecutioner { * @param {string} providerId The ID of the provider currently in use. * @param {string} jobKey Key of the job that is currently running (from within the config). * @param {SimilarityCache} similarityCache Cache instance for checking similar entries. + * @param {Object} [globalSettings] Global application settings (e.g., Bright Data credentials). */ - constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache) { + constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache, globalSettings) { this._providerConfig = providerConfig; this._notificationConfig = notificationConfig; this._providerId = providerId; this._jobKey = jobKey; this._similarityCache = similarityCache; + this._globalSettings = globalSettings || {}; } /** diff --git a/lib/provider/einsAImmobilien.js b/lib/provider/einsAImmobilien.js index 0c9a5920..34c460ef 100755 --- a/lib/provider/einsAImmobilien.js +++ b/lib/provider/einsAImmobilien.js @@ -66,5 +66,6 @@ export const metaInformation = { name: '1a Immobilien', baseUrl: 'https://www.1a-immobilienmarkt.de/', id: 'einsAImmobilien', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/flatfox.js b/lib/provider/flatfox.js new file mode 100644 index 00000000..19f24be5 --- /dev/null +++ b/lib/provider/flatfox.js @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/* + * Flatfox.ch provider for Fredy + * Swiss real estate platform owned by SMG (free for landlords) + * + * Uses Flatfox public API instead of HTML scraping for reliability. + */ + +import { buildHash, isOneOf } from '../utils.js'; +import logger from '../services/logger.js'; + +const BRIGHT_DATA_API_URL = 'https://api.brightdata.com/request'; + +let appliedBlackList = []; + +/** + * Fetch JSON via Bright Data Web Unlocker (for Swiss IP). + * Falls back to direct fetch if credentials not configured. + */ +async function fetchJson(url, globalSettings = {}) { + const apiToken = globalSettings.brightDataApiToken; + const zone = globalSettings.brightDataZone; + + if (apiToken && zone) { + // Use Bright Data proxy for Swiss IP + logger.debug(`Flatfox: Fetching via Bright Data: ${url}`); + const response = await fetch(BRIGHT_DATA_API_URL, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${apiToken}`, + }, + body: JSON.stringify({ + zone: zone, + url: url, + format: 'raw', + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`Bright Data API error (${response.status}): ${errorText}`); + } + + const text = await response.text(); + return JSON.parse(text); + } else { + // Direct fetch (works locally but not from non-Swiss IPs) + logger.debug(`Flatfox: Direct fetch (no Bright Data): ${url}`); + const response = await fetch(url); + if (!response.ok) { + throw new Error(`HTTP error: ${response.status}`); + } + return response.json(); + } +} + +/** + * Extract search parameters from user-provided URL. + * Example URL: https://www.flatfox.ch/en/search/?east=7.533549&north=47.024424&object_category=APARTMENT&offer_type=RENT&south=46.909588&west=7.318974 + */ +function parseSearchUrl(url) { + const urlObj = new URL(url); + const params = {}; + + // Extract all search parameters + for (const [key, value] of urlObj.searchParams) { + params[key] = value; + } + + return params; +} + +/** + * Fetch listing IDs from the pin API (map markers). + */ +async function fetchListingIds(params, globalSettings) { + const apiUrl = new URL('https://flatfox.ch/api/v1/pin/'); + + // Required geo params + if (params.east) apiUrl.searchParams.set('east', params.east); + if (params.west) apiUrl.searchParams.set('west', params.west); + if (params.north) apiUrl.searchParams.set('north', params.north); + if (params.south) apiUrl.searchParams.set('south', params.south); + + // Filter params + if (params.object_category) apiUrl.searchParams.set('object_category', params.object_category); + if (params.offer_type) apiUrl.searchParams.set('offer_type', params.offer_type); + + // Rooms filter - API uses min_rooms/max_rooms + if (params.min_rooms) apiUrl.searchParams.set('min_rooms', params.min_rooms); + if (params.max_rooms) apiUrl.searchParams.set('max_rooms', params.max_rooms); + + // Price filter - API uses min_price/max_price + if (params.min_price) apiUrl.searchParams.set('min_price', params.min_price); + if (params.max_price) apiUrl.searchParams.set('max_price', params.max_price); + + // Additional filters + if (params.attribute) apiUrl.searchParams.set('attribute', params.attribute); + if (params.moving_date_from) apiUrl.searchParams.set('moving_date_from', params.moving_date_from); + if (params.is_swap) apiUrl.searchParams.set('is_swap', params.is_swap); + if (params.ordering) apiUrl.searchParams.set('ordering', params.ordering); + + // Limit results (website uses 400) + apiUrl.searchParams.set('max_count', '400'); + + const pins = await fetchJson(apiUrl.toString(), globalSettings); + return pins.map((pin) => pin.pk); +} + +/** + * Filter listings based on user's search parameters. + * The pin API doesn't always respect price/rooms filters perfectly, so we filter client-side as backup. + */ +function filterListings(listings, params) { + const minRooms = parseFloat(params.min_rooms || 0); + const maxRooms = parseFloat(params.max_rooms || Infinity); + const minPrice = parseFloat(params.min_price || 0); + const maxPrice = parseFloat(params.max_price || Infinity); + + return listings.filter((item) => { + const rooms = item.number_of_rooms || 0; + const price = item.price_display || 0; + + if (rooms < minRooms || rooms > maxRooms) return false; + if (price < minPrice || price > maxPrice) return false; + + return true; + }); +} + +/** + * Fetch full listing details from the public-listing API. + */ +async function fetchListingDetails(pks, globalSettings) { + if (pks.length === 0) return []; + + const apiUrl = new URL('https://flatfox.ch/api/v1/public-listing/'); + apiUrl.searchParams.set('expand', 'cover_image'); + apiUrl.searchParams.set('limit', '0'); // No pagination limit + + // Add all PKs as query params + pks.forEach((pk) => apiUrl.searchParams.append('pk', pk)); + + const data = await fetchJson(apiUrl.toString(), globalSettings); + + // API returns array directly, not wrapped in {results: [...]} + if (Array.isArray(data)) { + return data; + } + + // Fallback for paginated response format + return data.results || []; +} + +/** + * Map API response to Fredy listing format. + */ +function mapApiToListing(item) { + const pk = String(item.pk); + const price = item.price_display ? `${item.price_display.toLocaleString('de-CH')} CHF` : ''; + + // Build rooms/size string + const rooms = item.number_of_rooms || ''; + const livingSpace = item.surface_living ? `${item.surface_living} m²` : ''; + const size = [rooms ? `${rooms} rooms` : '', livingSpace].filter(Boolean).join(', '); + + // Build image URL + let image = ''; + if (item.cover_image?.url_listing_search) { + image = `https://flatfox.ch${item.cover_image.url_listing_search}`; + } + + return { + id: pk, + price: price, + size: size, + title: item.short_title || item.pitch_title || '', + link: `https://www.flatfox.ch${item.url}`, + description: item.description_title || item.description?.substring(0, 200) || '', + address: item.public_address || '', + image: image, + }; +} + +/** + * Custom getListings function that uses Flatfox API. + * Called with `this` bound to FredyPipelineExecutioner. + * + * Uses Bright Data Web Unlocker to get Swiss IP (Flatfox geo-restricts results). + */ +async function getListings(url) { + const globalSettings = this._globalSettings || {}; + + try { + // Parse the user's search URL + const params = parseSearchUrl(url); + logger.info( + `Flatfox: Searching with params: ${JSON.stringify({ min_rooms: params.min_rooms, max_price: params.max_price, moving_date_from: params.moving_date_from })}`, + ); + + // Fetch listing IDs from pin API + const pks = await fetchListingIds(params, globalSettings); + logger.info(`Flatfox: Pin API returned ${pks.length} PKs`); + + if (pks.length === 0) { + logger.info('Flatfox: Found 0 listings (no PKs from pin API)'); + return []; + } + + // Fetch full details for each listing + const listings = await fetchListingDetails(pks, globalSettings); + logger.info(`Flatfox: Details API returned ${listings.length} listings`); + + // Filter by price/rooms (pin API doesn't support these filters) + const filtered = filterListings(listings, params); + logger.info(`Flatfox: After price/rooms filter: ${filtered.length} listings`); + + // Map to Fredy format + const result = filtered.map(mapApiToListing); + logger.info(`Flatfox: Found ${result.length} listings`); + return result; + } catch (error) { + logger.error('Flatfox API error:', error); + return []; + } +} + +function normalize(o) { + // ID is already the pk from API, create hash with price for dedup + const id = buildHash(o.id, o.price); + return Object.assign(o, { id }); +} + +function applyBlacklist(o) { + const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); + const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); + return o.title != null && titleNotBlacklisted && descNotBlacklisted; +} + +const config = { + url: null, + // These are not used when getListings is provided, but kept for compatibility + crawlContainer: null, + sortByDateParam: null, + waitForSelector: null, + crawlFields: { + id: '', + price: '', + size: '', + title: '', + link: '', + description: '', + address: '', + image: '', + }, + normalize: normalize, + filter: applyBlacklist, + getListings: getListings, +}; + +export const init = (sourceConfig, blacklist) => { + config.enabled = sourceConfig.enabled; + config.url = sourceConfig.url; + appliedBlackList = blacklist || []; +}; + +export const metaInformation = { + name: 'Flatfox', + baseUrl: 'https://flatfox.ch/', + id: 'flatfox', + currency: 'CHF', +}; + +export { config }; diff --git a/lib/provider/immobilienDe.js b/lib/provider/immobilienDe.js index 8be560eb..448ae7bf 100644 --- a/lib/provider/immobilienDe.js +++ b/lib/provider/immobilienDe.js @@ -63,5 +63,6 @@ export const metaInformation = { name: 'Immobilien.de', baseUrl: 'https://www.immobilien.de/', id: 'immobilienDe', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/immonet.js b/lib/provider/immonet.js index 6dfdaf19..915bbd60 100755 --- a/lib/provider/immonet.js +++ b/lib/provider/immonet.js @@ -49,5 +49,6 @@ export const metaInformation = { name: 'Immonet', baseUrl: 'https://www.immonet.de/', id: 'immonet', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/immoscout.js b/lib/provider/immoscout.js index eadaf755..01ee980e 100644 --- a/lib/provider/immoscout.js +++ b/lib/provider/immoscout.js @@ -142,6 +142,7 @@ export const metaInformation = { name: 'Immoscout', baseUrl: 'https://www.immobilienscout24.de/', id: 'immoscout', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/immoscout24ch.js b/lib/provider/immoscout24ch.js new file mode 100644 index 00000000..1b7fc90f --- /dev/null +++ b/lib/provider/immoscout24ch.js @@ -0,0 +1,412 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/** + * ImmoScout24.ch provider for Fredy + * + * IMPORTANT: ImmoScout24.ch (SMG Swiss Marketplace Group) is a completely + * separate company from ImmoScout24.de (Scout24 SE). They share no code, + * APIs, or infrastructure. + * + * This provider extracts listings from window.__INITIAL_STATE__ which contains + * the full listing data as JSON. This is more reliable than HTML scraping. + * + * Uses Bright Data Web Unlocker to bypass DataDome bot protection. + * The Web Unlocker handles JavaScript rendering and captcha solving internally. + */ + +import { buildHash, isOneOf, nullOrEmpty, extractEmbeddedJson, sleep } from '../utils.js'; +import logger from '../services/logger.js'; + +const BRIGHT_DATA_API_URL = 'https://api.brightdata.com/request'; +const BRIGHT_DATA_TIMEOUT_MS = 120_000; // 120 seconds - Bright Data scraping can be slow +const DEFAULT_MAX_PAGES = 5; // Limit pages to control Bright Data costs +const PAGE_DELAY_MS = 2000; // Delay between page requests to avoid rate limiting + +let appliedBlackList = []; + +/** + * Extract __INITIAL_STATE__ from HTML string using robust brace-matching parser. + * @param {string} html - Raw HTML content + * @returns {Object|null} Parsed initial state or null + */ +function extractInitialStateFromHtml(html) { + const result = extractEmbeddedJson(html, '__INITIAL_STATE__'); + if (!result) { + logger.warn('ImmoScout24.ch: Could not find __INITIAL_STATE__ in HTML'); + } + return result; +} + +/** + * Extract listings from __INITIAL_STATE__ JSON. + * Path: window.__INITIAL_STATE__.resultList.search.fullSearch.result.listings + */ +function extractListingsFromState(initialState) { + const listings = initialState?.resultList?.search?.fullSearch?.result?.listings; + if (!Array.isArray(listings)) { + logger.warn('ImmoScout24.ch: No listings array found in __INITIAL_STATE__'); + return []; + } + return listings; +} + +/** + * Extract pagination info from __INITIAL_STATE__. + * Path: window.__INITIAL_STATE__.resultList.search.fullSearch.result + * @param {Object} initialState - Parsed __INITIAL_STATE__ + * @returns {Object} Pagination info { currentPage, totalPages, totalCount } + */ +function extractPaginationInfo(initialState) { + const result = initialState?.resultList?.search?.fullSearch?.result; + const paging = result?.paging || {}; + + // SMG uses 'page' (1-indexed) and 'pageCount' + const currentPage = paging.page || 1; + const totalPages = paging.pageCount || 1; + const totalCount = result?.resultCount || 0; + + return { currentPage, totalPages, totalCount }; +} + +/** + * Build URL with pagination parameter. + * @param {string} baseUrl - Original search URL + * @param {number} pageNum - Page number (1-indexed) + * @returns {string} URL with pagination parameter + */ +function buildPageUrl(baseUrl, pageNum) { + const url = new URL(baseUrl); + url.searchParams.set('pn', String(pageNum)); + return url.toString(); +} + +/** + * Map property categories to URL-friendly slugs. + * Categories from __INITIAL_STATE__ like ["APARTMENT", "FLAT"] map to URL segments. + */ +const PROPERTY_TYPE_MAP = { + APARTMENT: 'flat', + FLAT: 'flat', + HOUSE: 'house', + VILLA: 'house', + CHALET: 'house', + FARMHOUSE: 'house', + STUDIO: 'flat', + LOFT: 'flat', + ATTIC: 'flat', + DUPLEX: 'flat', + PARKING: 'parking', + GARAGE: 'parking', + COMMERCIAL: 'commercial', + OFFICE: 'commercial', + RETAIL: 'commercial', + INDUSTRIAL: 'commercial', + GASTRONOMY: 'commercial', + PLOT: 'plot', + LAND: 'plot', +}; + +/** + * Get URL-friendly property type from categories array. + * @param {string[]} categories - Array of category strings from listing + * @returns {string} URL slug for property type + */ +export function getPropertyTypeSlug(categories) { + if (!Array.isArray(categories) || categories.length === 0) { + return 'property'; + } + for (const cat of categories) { + const slug = PROPERTY_TYPE_MAP[cat?.toUpperCase()]; + if (slug) return slug; + } + return 'property'; +} + +/** + * Sanitize locality string for use in URLs. + * Handles special characters, spaces, and umlauts. + * @param {string} locality - Raw locality string (e.g., "Zurich HB", "St. Gallen") + * @returns {string} URL-safe locality slug + */ +export function sanitizeLocality(locality) { + if (!locality || typeof locality !== 'string') { + return 'switzerland'; + } + return ( + locality + .toLowerCase() + // Replace German umlauts + .replace(/ä/g, 'ae') + .replace(/ö/g, 'oe') + .replace(/ü/g, 'ue') + .replace(/ß/g, 'ss') + // Replace French accents + .replace(/[éèêë]/g, 'e') + .replace(/[àâä]/g, 'a') + .replace(/[ùûü]/g, 'u') + .replace(/[îï]/g, 'i') + .replace(/[ôö]/g, 'o') + .replace(/ç/g, 'c') + // Replace any non-alphanumeric chars with dashes + .replace(/[^a-z0-9]+/g, '-') + // Remove leading/trailing dashes + .replace(/^-+|-+$/g, '') || 'switzerland' + ); +} + +/** + * Build price string from listing prices object. + */ +function buildPriceString(prices) { + if (prices.rent?.gross) { + const suffix = prices.rent.interval === 'MONTH' ? '/month' : ''; + return `CHF ${prices.rent.gross}${suffix}`; + } + if (prices.buy?.price) { + return `CHF ${prices.buy.price}`; + } + return ''; +} + +/** + * Build size string from listing characteristics. + */ +function buildSizeString(characteristics) { + const parts = []; + if (characteristics.numberOfRooms) { + parts.push(`${characteristics.numberOfRooms} rooms`); + } + if (characteristics.livingSpace) { + parts.push(`${characteristics.livingSpace} m²`); + } + return parts.join(', '); +} + +/** + * Build address string from listing address object. + */ +function buildAddressString(address) { + const parts = []; + if (address.street) { + parts.push(address.street); + } + if (address.postalCode || address.locality) { + parts.push([address.postalCode, address.locality].filter(Boolean).join(' ')); + } + return parts.join(', '); +} + +/** + * Transform a listing from __INITIAL_STATE__ format to Fredy format. + */ +function transformListing(item) { + const listing = item.listing || {}; + const localization = listing.localization || {}; + const primaryLang = localization.primary || 'de'; + const localizedData = localization[primaryLang] || localization.de || {}; + const text = localizedData.text || {}; + const characteristics = listing.characteristics || {}; + const prices = listing.prices || {}; + const address = listing.address || {}; + const attachments = localizedData.attachments || []; + + const listingId = listing.id || item.id; + const offerType = listing.offerType === 'BUY' ? 'buy' : 'rent'; + const imageAttachment = attachments.find((a) => a.type === 'IMAGE'); + + return { + id: listingId, + title: text.title || '', + description: text.description || '', + price: buildPriceString(prices), + size: buildSizeString(characteristics), + address: buildAddressString(address), + link: `https://www.immoscout24.ch/${offerType}/${listingId}`, + image: imageAttachment?.url || '', + }; +} + +/** + * Normalize a listing object (called by pipeline after getListings). + */ +function normalize(o) { + const id = buildHash(o.id, o.price); + const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.trim(); + const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : o.address.trim(); + + return Object.assign(o, { id, title, address }); +} + +/** + * Apply blacklist filter. + */ +function applyBlacklist(o) { + const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); + const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); + return titleNotBlacklisted && descNotBlacklisted; +} + +/** + * Fetch a single page of listings using Bright Data Web Unlocker. + * + * @param {string} url The URL to fetch + * @param {string} apiToken Bright Data API token + * @param {string} zone Bright Data zone + * @returns {Promise<{listings: Array, pagination: Object}|null>} Listings and pagination info, or null on error + */ +async function fetchPage(url, apiToken, zone) { + try { + logger.debug(`ImmoScout24.ch: Fetching page via Bright Data from ${url}`); + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), BRIGHT_DATA_TIMEOUT_MS); + + let response; + try { + response = await fetch(BRIGHT_DATA_API_URL, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${apiToken}`, + }, + body: JSON.stringify({ + zone: zone, + url: url, + format: 'raw', + }), + signal: controller.signal, + }); + } finally { + clearTimeout(timeoutId); + } + + if (!response.ok) { + const errorText = await response.text(); + logger.error(`ImmoScout24.ch: Bright Data API error (${response.status}): ${errorText}`); + return null; + } + + const html = await response.text(); + logger.debug(`ImmoScout24.ch: Received ${html.length} bytes from Bright Data`); + + const initialState = extractInitialStateFromHtml(html); + if (!initialState) { + logger.warn('ImmoScout24.ch: Could not extract __INITIAL_STATE__ from response'); + return null; + } + + const listings = extractListingsFromState(initialState); + const pagination = extractPaginationInfo(initialState); + + return { listings, pagination }; + } catch (error) { + if (error.name === 'AbortError') { + logger.error(`ImmoScout24.ch: Request timed out after ${BRIGHT_DATA_TIMEOUT_MS / 1000}s`); + } else { + logger.error('ImmoScout24.ch: Error fetching page:', error); + } + return null; + } +} + +/** + * Fetch listings using Bright Data Web Unlocker with pagination support. + * Called with `this` bound to FredyPipelineExecutioner. + * + * Fetches multiple pages up to a configurable limit to get comprehensive results. + * Default behavior: fetches up to 5 pages (configurable via global settings). + * + * @param {string} url The URL to fetch listings from + * @returns {Promise} Array of listing objects from all pages + */ +async function getListings(url) { + const globalSettings = this._globalSettings || {}; + const apiToken = globalSettings.brightDataApiToken; + const zone = globalSettings.brightDataZone; + const maxPages = globalSettings.immoscout24MaxPages || DEFAULT_MAX_PAGES; + + if (!apiToken || !zone) { + logger.warn('ImmoScout24.ch: Bright Data API token or zone not configured. Please add them in Settings.'); + return []; + } + + const allListings = []; + + // Fetch first page to get pagination info + const firstPage = await fetchPage(url, apiToken, zone); + if (!firstPage) { + return []; + } + + allListings.push(...firstPage.listings); + const { totalPages, totalCount } = firstPage.pagination; + + logger.info( + `ImmoScout24.ch: Page 1/${totalPages} - found ${firstPage.listings.length} listings (${totalCount} total)`, + ); + + // Calculate how many pages to fetch + const pagesToFetch = Math.min(totalPages, maxPages); + + // Fetch remaining pages + for (let pageNum = 2; pageNum <= pagesToFetch; pageNum++) { + // Rate limit: wait between page requests + await sleep(PAGE_DELAY_MS); + + const pageUrl = buildPageUrl(url, pageNum); + const page = await fetchPage(pageUrl, apiToken, zone); + + if (!page || page.listings.length === 0) { + logger.info(`ImmoScout24.ch: No more listings found at page ${pageNum}, stopping pagination`); + break; + } + + allListings.push(...page.listings); + logger.info(`ImmoScout24.ch: Page ${pageNum}/${pagesToFetch} - found ${page.listings.length} listings`); + } + + logger.info(`ImmoScout24.ch: Total ${allListings.length} listings from ${pagesToFetch} page(s)`); + + return allListings.map(transformListing); +} + +const config = { + url: null, + // These are kept for compatibility but not used when getListings is defined + crawlContainer: '[data-test^="result-list-item"]', + sortByDateParam: 'sorting=dateCreated-desc', + waitForSelector: null, + proxyRequired: false, // Bright Data handles everything internally + crawlFields: { + id: 'id', + price: 'price', + size: 'size', + title: 'title', + link: 'link', + description: 'description', + address: 'address', + image: 'image', + }, + // Custom extraction using Bright Data Web Unlocker + getListings: getListings, + normalize: normalize, + filter: applyBlacklist, +}; + +export const init = (sourceConfig, blacklist) => { + config.enabled = sourceConfig.enabled; + config.url = sourceConfig.url; + appliedBlackList = blacklist || []; +}; + +export const metaInformation = { + name: 'ImmoScout24.ch', + baseUrl: 'https://www.immoscout24.ch/', + id: 'immoscout24ch', + currency: 'CHF', +}; + +export { config }; diff --git a/lib/provider/immoswp.js b/lib/provider/immoswp.js index 51269821..671d9af2 100755 --- a/lib/provider/immoswp.js +++ b/lib/provider/immoswp.js @@ -52,5 +52,6 @@ export const metaInformation = { name: 'Immo Südwest Presse', baseUrl: 'https://immo.swp.de/', id: 'immoswp', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/immowelt.js b/lib/provider/immowelt.js index df50c4be..b8d7a441 100755 --- a/lib/provider/immowelt.js +++ b/lib/provider/immowelt.js @@ -48,5 +48,6 @@ export const metaInformation = { name: 'Immowelt', baseUrl: 'https://www.immowelt.de/', id: 'immowelt', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/kleinanzeigen.js b/lib/provider/kleinanzeigen.js index d72c474c..f5de1273 100755 --- a/lib/provider/kleinanzeigen.js +++ b/lib/provider/kleinanzeigen.js @@ -48,6 +48,7 @@ export const metaInformation = { name: 'Ebay Kleinanzeigen', baseUrl: 'https://www.kleinanzeigen.de/', id: 'kleinanzeigen', + currency: 'EUR', }; export const init = (sourceConfig, blacklist, blacklistedDistricts) => { config.enabled = sourceConfig.enabled; diff --git a/lib/provider/mcMakler.js b/lib/provider/mcMakler.js index 5a25cd8d..32a41a0b 100755 --- a/lib/provider/mcMakler.js +++ b/lib/provider/mcMakler.js @@ -48,5 +48,6 @@ export const metaInformation = { name: 'McMakler', baseUrl: 'https://www.mcmakler.de/immobilien/', id: 'mcMakler', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/neubauKompass.js b/lib/provider/neubauKompass.js index bde911d0..bffa5d5e 100755 --- a/lib/provider/neubauKompass.js +++ b/lib/provider/neubauKompass.js @@ -50,5 +50,6 @@ export const metaInformation = { name: 'Neubau Kompass', baseUrl: 'https://www.neubaukompass.de/', id: 'neubauKompass', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/ohneMakler.js b/lib/provider/ohneMakler.js index ca90363d..45e399ff 100755 --- a/lib/provider/ohneMakler.js +++ b/lib/provider/ohneMakler.js @@ -46,5 +46,6 @@ export const metaInformation = { name: 'OhneMakler', baseUrl: 'https://www.ohne-makler.net', id: 'ohneMakler', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/regionalimmobilien24.js b/lib/provider/regionalimmobilien24.js index baaae6fa..732ff349 100755 --- a/lib/provider/regionalimmobilien24.js +++ b/lib/provider/regionalimmobilien24.js @@ -50,5 +50,6 @@ export const metaInformation = { name: 'Regionalimmobilien24', baseUrl: 'https://www.regionalimmobilien24.de/', id: 'regionalimmobilien24', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/sparkasse.js b/lib/provider/sparkasse.js index bc5aa2a5..b309275a 100755 --- a/lib/provider/sparkasse.js +++ b/lib/provider/sparkasse.js @@ -47,5 +47,6 @@ export const metaInformation = { name: 'Sparkasse Immobilien', baseUrl: 'https://immobilien.sparkasse.de/', id: 'sparkasse', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/wgGesucht.js b/lib/provider/wgGesucht.js index d0d05519..396b7ae3 100755 --- a/lib/provider/wgGesucht.js +++ b/lib/provider/wgGesucht.js @@ -48,5 +48,6 @@ export const metaInformation = { name: 'Wg gesucht', baseUrl: 'https://www.wg-gesucht.de/', id: 'wgGesucht', + currency: 'EUR', }; export { config }; diff --git a/lib/provider/wohnungsboerse.js b/lib/provider/wohnungsboerse.js index f6c37c4e..50d0f715 100644 --- a/lib/provider/wohnungsboerse.js +++ b/lib/provider/wohnungsboerse.js @@ -53,6 +53,7 @@ export const metaInformation = { name: 'Wohnungsboerse', baseUrl: 'https://www.wohnungsboerse.net', id: 'wohnungsboerse', + currency: 'EUR', }; export { config }; diff --git a/lib/services/jobs/jobExecutionService.js b/lib/services/jobs/jobExecutionService.js index 34b05a78..ffc863e8 100644 --- a/lib/services/jobs/jobExecutionService.js +++ b/lib/services/jobs/jobExecutionService.js @@ -167,6 +167,7 @@ export function initJobExecutionService({ providers, settings, intervalMs }) { prov.id, job.id, similarityCache, + settings, ).execute(); }); const results = await Promise.allSettled(executions); diff --git a/lib/utils.js b/lib/utils.js index 692b03d0..72b8d404 100755 --- a/lib/utils.js +++ b/lib/utils.js @@ -295,6 +295,101 @@ function randomBetween(min, max) { return Math.floor(Math.random() * (max - min + 1)) + min; } +/** + * Extract embedded JSON from HTML by finding a variable assignment and using brace matching. + * More robust than regex for complex nested JSON structures. + * + * @param {string} html - Raw HTML content + * @param {string} variableName - The JavaScript variable name to find (e.g., '__INITIAL_STATE__', '__NEXT_DATA__') + * @returns {Object|null} Parsed JSON object or null if not found/invalid + */ +function extractEmbeddedJson(html, variableName) { + if (!html || typeof html !== 'string') { + return null; + } + + const patterns = [ + `window.${variableName}=`, + `window.${variableName} =`, + `window["${variableName}"]=`, + `window["${variableName}"] =`, + ]; + + let startIndex = -1; + for (const pattern of patterns) { + startIndex = html.indexOf(pattern); + if (startIndex !== -1) { + startIndex += pattern.length; + break; + } + } + + if (startIndex === -1) { + return null; + } + + while (startIndex < html.length && /\s/.test(html[startIndex])) { + startIndex++; + } + + if (startIndex >= html.length || html[startIndex] !== '{') { + return null; + } + + let braceCount = 0; + let inString = false; + let escapeNext = false; + let endIndex = -1; + + for (let i = startIndex; i < html.length; i++) { + const char = html[i]; + + if (escapeNext) { + escapeNext = false; + continue; + } + + if (char === '\\' && inString) { + escapeNext = true; + continue; + } + + if (char === '"' && !inString) { + inString = true; + continue; + } + + if (char === '"' && inString) { + inString = false; + continue; + } + + if (!inString) { + if (char === '{') { + braceCount++; + } else if (char === '}') { + braceCount--; + if (braceCount === 0) { + endIndex = i + 1; + break; + } + } + } + } + + if (endIndex === -1) { + return null; + } + + const jsonString = html.slice(startIndex, endIndex); + + try { + return JSON.parse(jsonString); + } catch { + return null; + } +} + // Call refreshConfig() from the application entrypoint during startup to populate config. await refreshConfig(); @@ -311,4 +406,5 @@ export { getPackageVersion, toJson, fromJson, + extractEmbeddedJson, }; diff --git a/test/provider/flatfox.test.js b/test/provider/flatfox.test.js new file mode 100644 index 00000000..78025bfe --- /dev/null +++ b/test/provider/flatfox.test.js @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/* + * Flatfox.ch provider test + */ + +import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js'; +import { get } from '../mocks/mockNotification.js'; +import { mockFredy, providerConfig } from '../utils.js'; +import { expect } from 'chai'; +import * as provider from '../../lib/provider/flatfox.js'; + +describe('#flatfox testsuite()', () => { + it('should test flatfox provider', async () => { + const Fredy = await mockFredy(); + provider.init(providerConfig.flatfox, [], []); + + const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'flatfox', similarityCache); + const listing = await fredy.execute(); + + expect(listing).to.be.a('array'); + const notificationObj = get(); + expect(notificationObj).to.be.a('object'); + expect(notificationObj.serviceName).to.equal('flatfox'); + notificationObj.payload.forEach((notify) => { + /** check the actual structure **/ + expect(notify.id).to.be.a('string'); + expect(notify.price).to.be.a('string'); + expect(notify.title).to.be.a('string'); + expect(notify.link).to.be.a('string'); + /** check the values if possible **/ + expect(notify.title).to.be.not.empty; + expect(notify.link).that.does.include('https://www.flatfox.ch'); + // Price should contain CHF or a number (empty is ok for "price on request" listings) + if (notify.price) { + expect(notify.price).to.match(/\d/); + } + }); + }); +}); diff --git a/test/provider/immoscout24ch.test.js b/test/provider/immoscout24ch.test.js new file mode 100644 index 00000000..532507f3 --- /dev/null +++ b/test/provider/immoscout24ch.test.js @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +import { expect } from 'chai'; +import { getPropertyTypeSlug, sanitizeLocality } from '../../lib/provider/immoscout24ch.js'; + +describe('#immoscout24ch URL helpers', () => { + describe('getPropertyTypeSlug()', () => { + it('should return "flat" for APARTMENT category', () => { + expect(getPropertyTypeSlug(['APARTMENT'])).to.equal('flat'); + }); + + it('should return "flat" for FLAT category', () => { + expect(getPropertyTypeSlug(['FLAT'])).to.equal('flat'); + }); + + it('should return "flat" for mixed apartment categories', () => { + expect(getPropertyTypeSlug(['APARTMENT', 'FLAT'])).to.equal('flat'); + }); + + it('should return "house" for HOUSE category', () => { + expect(getPropertyTypeSlug(['HOUSE'])).to.equal('house'); + }); + + it('should return "house" for VILLA category', () => { + expect(getPropertyTypeSlug(['VILLA'])).to.equal('house'); + }); + + it('should return "house" for CHALET category', () => { + expect(getPropertyTypeSlug(['CHALET'])).to.equal('house'); + }); + + it('should return "parking" for PARKING category', () => { + expect(getPropertyTypeSlug(['PARKING'])).to.equal('parking'); + }); + + it('should return "parking" for GARAGE category', () => { + expect(getPropertyTypeSlug(['GARAGE'])).to.equal('parking'); + }); + + it('should return "commercial" for OFFICE category', () => { + expect(getPropertyTypeSlug(['OFFICE'])).to.equal('commercial'); + }); + + it('should return "commercial" for RETAIL category', () => { + expect(getPropertyTypeSlug(['RETAIL'])).to.equal('commercial'); + }); + + it('should return "plot" for LAND category', () => { + expect(getPropertyTypeSlug(['LAND'])).to.equal('plot'); + }); + + it('should return "property" for empty array', () => { + expect(getPropertyTypeSlug([])).to.equal('property'); + }); + + it('should return "property" for null input', () => { + expect(getPropertyTypeSlug(null)).to.equal('property'); + }); + + it('should return "property" for undefined input', () => { + expect(getPropertyTypeSlug(undefined)).to.equal('property'); + }); + + it('should return "property" for unknown category', () => { + expect(getPropertyTypeSlug(['UNKNOWN_TYPE'])).to.equal('property'); + }); + + it('should handle lowercase categories', () => { + expect(getPropertyTypeSlug(['apartment'])).to.equal('flat'); + }); + + it('should handle mixed case categories', () => { + expect(getPropertyTypeSlug(['Apartment'])).to.equal('flat'); + }); + + it('should use first matching category', () => { + expect(getPropertyTypeSlug(['UNKNOWN', 'HOUSE', 'APARTMENT'])).to.equal('house'); + }); + }); + + describe('sanitizeLocality()', () => { + it('should lowercase simple locality', () => { + expect(sanitizeLocality('Zurich')).to.equal('zurich'); + }); + + it('should replace German umlauts', () => { + expect(sanitizeLocality('Zürich')).to.equal('zuerich'); + expect(sanitizeLocality('Köln')).to.equal('koeln'); + expect(sanitizeLocality('München')).to.equal('muenchen'); + }); + + it('should replace ß with ss', () => { + expect(sanitizeLocality('Straße')).to.equal('strasse'); + }); + + it('should replace French accents', () => { + expect(sanitizeLocality('Genève')).to.equal('geneve'); + expect(sanitizeLocality('Neuchâtel')).to.equal('neuchatel'); + }); + + it('should replace spaces with dashes', () => { + expect(sanitizeLocality('St. Gallen')).to.equal('st-gallen'); + expect(sanitizeLocality('Zürich HB')).to.equal('zuerich-hb'); + }); + + it('should handle multiple spaces', () => { + expect(sanitizeLocality('New York')).to.equal('new-york'); + }); + + it('should remove special characters', () => { + expect(sanitizeLocality('City (Center)')).to.equal('city-center'); + expect(sanitizeLocality('Area/District')).to.equal('area-district'); + }); + + it('should remove leading and trailing dashes', () => { + expect(sanitizeLocality(' Zurich ')).to.equal('zurich'); + expect(sanitizeLocality('--Zurich--')).to.equal('zurich'); + }); + + it('should return "switzerland" for null input', () => { + expect(sanitizeLocality(null)).to.equal('switzerland'); + }); + + it('should return "switzerland" for undefined input', () => { + expect(sanitizeLocality(undefined)).to.equal('switzerland'); + }); + + it('should return "switzerland" for empty string', () => { + expect(sanitizeLocality('')).to.equal('switzerland'); + }); + + it('should return "switzerland" for non-string input', () => { + expect(sanitizeLocality(123)).to.equal('switzerland'); + expect(sanitizeLocality({})).to.equal('switzerland'); + }); + + it('should handle complex Swiss localities', () => { + expect(sanitizeLocality('Bern Länggasse-Felsenau')).to.equal('bern-laenggasse-felsenau'); + expect(sanitizeLocality('La Chaux-de-Fonds')).to.equal('la-chaux-de-fonds'); + }); + }); +}); diff --git a/test/provider/testProvider.json b/test/provider/testProvider.json index 39a7a5b3..58966dba 100644 --- a/test/provider/testProvider.json +++ b/test/provider/testProvider.json @@ -55,5 +55,9 @@ "wohnungsboerse": { "url": "https://www.wohnungsboerse.net/searches/index?estate_marketing_types=kauf%2C1&marketing_type=kauf&estate_types%5B0%5D=1&is_rendite=0&estate_id=&zipcodes%5B%5D=&cities%5B%5D=Duesseldorf&districts%5B%5D=&term=D%C3%BCsseldorf&umkreiskm=&pricetext=&minprice=&maxprice=&sizetext=&minsize=&maxsize=&roomstext=&minrooms=&maxrooms=", "IsActive": true + }, + "flatfox": { + "url": "https://www.flatfox.ch/en/search/?east=7.533549&north=47.024424&object_category=APARTMENT&offer_type=RENT&south=46.909588&west=7.318974", + "enabled": true } } diff --git a/ui/src/components/grid/listings/ListingsGrid.jsx b/ui/src/components/grid/listings/ListingsGrid.jsx index d555db21..79991a0c 100644 --- a/ui/src/components/grid/listings/ListingsGrid.jsx +++ b/ui/src/components/grid/listings/ListingsGrid.jsx @@ -282,7 +282,7 @@ const ListingsGrid = () => { } size="small"> - {item.price} € + {item.price} {providers?.find((p) => p.id === item.provider)?.currency || '€'} { {item.distance_to_destination ? ( }> - {item.distance_to_destination} m to chosen address + {item.distance_to_destination >= 1000 + ? `${(item.distance_to_destination / 1000).toFixed(1)} km` + : `${Math.round(item.distance_to_destination)} m`}{' '} + to chosen address ) : ( }> diff --git a/ui/src/views/dashboard/Dashboard.jsx b/ui/src/views/dashboard/Dashboard.jsx index 63ad72cd..c086068e 100644 --- a/ui/src/views/dashboard/Dashboard.jsx +++ b/ui/src/views/dashboard/Dashboard.jsx @@ -137,8 +137,8 @@ export default function Dashboard() { !kpis.avgPriceOfListings ? '---' : new Intl.NumberFormat('de-DE', { - style: 'currency', - currency: 'EUR', + style: 'decimal', + maximumFractionDigits: 0, }).format(kpis.avgPriceOfListings) }`} icon={} diff --git a/ui/src/views/generalSettings/GeneralSettings.jsx b/ui/src/views/generalSettings/GeneralSettings.jsx index 084c2416..ef53c27b 100644 --- a/ui/src/views/generalSettings/GeneralSettings.jsx +++ b/ui/src/views/generalSettings/GeneralSettings.jsx @@ -25,6 +25,7 @@ import { IconLineChartStroked, IconSearch, IconFolder, + IconLock, } from '@douyinfe/semi-icons'; import './GeneralSettings.less'; @@ -57,6 +58,8 @@ const GeneralSettings = function GeneralSettings() { const [demoMode, setDemoMode] = React.useState(null); const [analyticsEnabled, setAnalyticsEnabled] = React.useState(null); const [sqlitePath, setSqlitePath] = React.useState(null); + const [brightDataApiToken, setBrightDataApiToken] = React.useState(''); + const [brightDataZone, setBrightDataZone] = React.useState(''); const fileInputRef = React.useRef(null); const [restoreModalVisible, setRestoreModalVisible] = React.useState(false); const [precheckInfo, setPrecheckInfo] = React.useState(null); @@ -81,6 +84,8 @@ const GeneralSettings = function GeneralSettings() { setAnalyticsEnabled(settings?.analyticsEnabled || false); setDemoMode(settings?.demoMode || false); setSqlitePath(settings?.sqlitepath); + setBrightDataApiToken(settings?.brightDataApiToken || ''); + setBrightDataZone(settings?.brightDataZone || ''); } init(); @@ -119,6 +124,8 @@ const GeneralSettings = function GeneralSettings() { demoMode, analyticsEnabled, sqlitepath: sqlitePath, + brightDataApiToken: brightDataApiToken || null, + brightDataZone: brightDataZone || null, }); } catch (exception) { console.error(exception); @@ -335,6 +342,50 @@ const GeneralSettings = function GeneralSettings() { + + When do you need this? + } + style={{ marginBottom: '1rem' }} + description={ +
+ Bright Data is only required for Swiss providers that use geo-restricted APIs or have bot protection + (DataDome). German providers work without it. You can get API credentials at{' '} + + brightdata.com + + . +
+ } + /> +
+ setBrightDataApiToken(value)} + insetLabel="API Token" + /> + setBrightDataZone(value)} + insetLabel="Zone" + /> +
+
+ + + state.provider); + const providerCurrency = providersList?.find((p) => p.id === listing.provider)?.currency || '€'; + const data = [ { key: 'Job', @@ -269,7 +272,7 @@ export default function ListingDetail() { value: listing.provider.charAt(0).toUpperCase() + listing.provider.slice(1), Icon: , }, - { key: 'Price', value: `${listing.price} €`, Icon: }, + { key: 'Price', value: `${listing.price} ${providerCurrency}`, Icon: }, { key: 'Size', value: listing.size ? `${listing.size} m²` : 'N/A', @@ -361,7 +364,11 @@ export default function ListingDetail() { Distance to home: - {listing.distance_to_destination} m + + {listing.distance_to_destination >= 1000 + ? `${(listing.distance_to_destination / 1000).toFixed(1)} km` + : `${Math.round(listing.distance_to_destination)} m`} + )} diff --git a/ui/src/views/listings/Map.jsx b/ui/src/views/listings/Map.jsx index 40f32389..0249ff71 100644 --- a/ui/src/views/listings/Map.jsx +++ b/ui/src/views/listings/Map.jsx @@ -23,9 +23,11 @@ import ListingDeletionModal from '../../components/ListingDeletionModal.jsx'; const { Text } = Typography; -const GERMANY_BOUNDS = [ - [5.866, 47.27], // Southwest coordinates - [15.042, 55.059], // Northeast coordinates +// Default bounds covering central Europe (Germany + Switzerland + Austria). +// The map auto-fits to listing coordinates when available. +const DEFAULT_BOUNDS = [ + [5.866, 45.818], // Southwest (covers Switzerland) + [16.6, 55.059], // Northeast (covers Germany) ]; const STYLES = { @@ -142,9 +144,8 @@ export default function MapView() { map.current = new maplibregl.Map({ container: mapContainer.current, style: STYLES[style], - center: [10.4515, 51.1657], // Center of Germany - zoom: 4, - maxBounds: GERMANY_BOUNDS, + bounds: DEFAULT_BOUNDS, + fitBoundsOptions: { padding: 20 }, antialias: true, }); @@ -396,7 +397,7 @@ export default function MapView() { />

${listing.title}

- Price: ${listing.price ? listing.price + ' €' : 'N/A'} + Price: ${listing.price || 'N/A'} Address: ${listing.address || 'N/A'} Job: ${listing.job_name || 'N/A'} Provider: ${capitalizedProvider} @@ -527,12 +528,12 @@ export default function MapView() {
- Price Range (€): + Price Range:
- {priceRange[0]} € - {priceRange[1]} € + {priceRange[0]} + {priceRange[1]}
{ setPriceRange(val); }} - tipFormatter={(val) => `${val} €`} + tipFormatter={(val) => `${val}`} />
diff --git a/ui/src/views/userSettings/UserSettings.jsx b/ui/src/views/userSettings/UserSettings.jsx index 939a7829..060b82f5 100644 --- a/ui/src/views/userSettings/UserSettings.jsx +++ b/ui/src/views/userSettings/UserSettings.jsx @@ -64,7 +64,7 @@ const UserSettings = () => { return (