From f9852e578db9fec8a26425c30188535a714d077f Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 7 Feb 2026 20:56:27 +0000 Subject: [PATCH] feat: refactor helpers and improve error handling - Fix `parseISO8601Duration` to correctly calculate total duration in minutes. - Refactor `fetchPage` to throw `CsfdError` instead of returning 'Error' string, and add retry logic with exponential backoff. - Refactor `src/helpers/global.helper.ts` and `src/helpers/movie.helper.ts` for better robustness and type safety. - Update tests to reflect changes and increase timeout for live tests. - Export `CsfdError` and `CSFDOptions` in `src/index.ts`. Co-authored-by: bartholomej <5861310+bartholomej@users.noreply.github.com> --- src/fetchers/index.ts | 66 ++++++++--- src/helpers/global.helper.ts | 70 +++++------- src/helpers/movie.helper.ts | 216 +++++++++++++++-------------------- src/index.ts | 2 + tests/fetchers.test.ts | 12 +- tests/global.test.ts | 34 ------ tests/helpers.test.ts | 2 +- tests/movie.test.ts | 4 +- vitest.config.mts | 1 + 9 files changed, 181 insertions(+), 226 deletions(-) diff --git a/src/fetchers/index.ts b/src/fetchers/index.ts index 59116e87..adba777a 100644 --- a/src/fetchers/index.ts +++ b/src/fetchers/index.ts @@ -11,26 +11,56 @@ const defaultHeaders = { 'User-Agent': USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)], }; +export class CsfdError extends Error { + constructor(message: string, public status?: number) { + super(message); + this.name = 'CsfdError'; + } +} + +const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + export const fetchPage = async (url: string, optionsRequest?: RequestInit): Promise => { - try { - const mergedHeaders = new Headers(defaultHeaders); - if (optionsRequest?.headers) { - const reqHeaders = new Headers(optionsRequest.headers); - reqHeaders.forEach((value, key) => mergedHeaders.set(key, value)); - } - const { headers: _, ...restOptions } = optionsRequest || {}; + const maxRetries = 3; + let lastError: Error | null = null; - const response = await fetchSafe(url, { credentials: 'omit', ...restOptions, headers: mergedHeaders }); - if (response.status >= 400 && response.status < 600) { - throw new Error(`node-csfd-api: Bad response ${response.status} for url: ${url}`); - } - return await response.text(); - } catch (e: unknown) { - if (e instanceof Error) { - console.error(e.message); - } else { - console.error(String(e)); + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + const mergedHeaders = new Headers(defaultHeaders); + if (optionsRequest?.headers) { + const reqHeaders = new Headers(optionsRequest.headers); + reqHeaders.forEach((value, key) => mergedHeaders.set(key, value)); + } + const { headers: _, ...restOptions } = optionsRequest || {}; + + const response = await fetchSafe(url, { credentials: 'omit', ...restOptions, headers: mergedHeaders }); + + if (response.status >= 400 && response.status < 600) { + // Do not retry on 4xx errors (client errors) + if (response.status < 500) { + throw new CsfdError(`node-csfd-api: Bad response ${response.status} for url: ${url}`, response.status); + } + // Throw to trigger catch and retry for 5xx + throw new CsfdError(`node-csfd-api: Server error ${response.status} for url: ${url}`, response.status); + } + return await response.text(); + } catch (e: unknown) { + const error = e instanceof Error ? e : new Error(String(e)); + lastError = error; + + // Don't retry if it is a 4xx error (which we threw as CsfdError) + if (error instanceof CsfdError && error.status && error.status >= 400 && error.status < 500) { + throw error; + } + + if (attempt === maxRetries) { + throw error; + } + + // Exponential backoff: 1s, 2s... + await sleep(1000 * Math.pow(2, attempt - 1)); } - return 'Error'; } + + throw lastError || new Error('Unknown error'); }; diff --git a/src/helpers/global.helper.ts b/src/helpers/global.helper.ts index c9636af4..b3bfd148 100644 --- a/src/helpers/global.helper.ts +++ b/src/helpers/global.helper.ts @@ -1,60 +1,47 @@ import { CSFDColorRating } from '../dto/global'; import { CSFDColors } from '../dto/user-ratings'; -export const parseIdFromUrl = (url: string): number => { - if (url) { - const idSlug = url?.split('/')[2]; - const id = idSlug?.split('-')[0]; - return +id || null; - } else { - return null; +export const parseIdFromUrl = (url: string | undefined | null): number | null => { + if (!url) return null; + // Match first sequence of digits + const match = url.match(/(\d+)/); + if (match && match[1]) { + return +match[1]; } + return null; +}; + +const COLOR_MAP: Record = { + 'lightgrey': 'unknown', + 'page-lightgrey': 'unknown', + 'red': 'good', + 'page-red': 'good', + 'blue': 'average', + 'page-blue': 'average', + 'grey': 'bad', + 'page-grey': 'bad' }; export const getColor = (cls: string): CSFDColorRating => { - switch (cls) { - case 'page-lightgrey': - return 'unknown'; - case 'page-red': - return 'good'; - case 'page-blue': - return 'average'; - case 'page-grey': - return 'bad'; - default: - return 'unknown'; - } + return COLOR_MAP[cls] || 'unknown'; }; export const parseColor = (quality: CSFDColors): CSFDColorRating => { - switch (quality) { - case 'lightgrey': - return 'unknown'; - case 'red': - return 'good'; - case 'blue': - return 'average'; - case 'grey': - return 'bad'; - default: - return 'unknown'; - } + return COLOR_MAP[quality] || 'unknown'; }; export const addProtocol = (url: string): string => { return url.startsWith('//') ? 'https:' + url : url; }; -export const getDuration = (matches: any[]) => { +// Internal helper +const getDuration = (matches: RegExpMatchArray | null) => { + if (!matches) { + return { hours: 0, minutes: 0 }; + } return { - sign: matches[1] === undefined ? '+' : '-', - years: matches[2] === undefined ? 0 : matches[2], - months: matches[3] === undefined ? 0 : matches[3], - weeks: matches[4] === undefined ? 0 : matches[4], - days: matches[5] === undefined ? 0 : matches[5], - hours: matches[6] === undefined ? 0 : matches[6], - minutes: matches[7] === undefined ? 0 : matches[7], - seconds: matches[8] === undefined ? 0 : matches[8] + hours: matches[6] ? +matches[6] : 0, + minutes: matches[7] ? +matches[7] : 0 }; }; @@ -63,10 +50,9 @@ export const parseISO8601Duration = (iso: string): number => { /(-)?P(?:([.,\d]+)Y)?(?:([.,\d]+)M)?(?:([.,\d]+)W)?(?:([.,\d]+)D)?T(?:([.,\d]+)H)?(?:([.,\d]+)M)?(?:([.,\d]+)S)?/; const matches = iso.match(iso8601DurationRegex); - const duration = getDuration(matches); - return +duration.minutes; + return duration.hours * 60 + duration.minutes; }; // Sleep in loop diff --git a/src/helpers/movie.helper.ts b/src/helpers/movie.helper.ts index 230b4004..5a15e4f1 100644 --- a/src/helpers/movie.helper.ts +++ b/src/helpers/movie.helper.ts @@ -17,9 +17,6 @@ import { addProtocol, getColor, parseISO8601Duration, parseIdFromUrl } from './g /** * Maps language-specific movie creator group labels. - * @param language - The language code (e.g., 'en', 'cs') - * @param key - The key of the creator group (e.g., 'directors', 'writers') - * @returns The localized label for the creator group */ export const getLocalizedCreatorLabel = ( language: string | undefined, @@ -73,26 +70,27 @@ export const getLocalizedCreatorLabel = ( } }; - const lang = language || 'cs'; // Default to Czech + const lang = language || 'cs'; return (labels[lang] || labels['cs'])[key]; }; export const getMovieId = (el: HTMLElement): number => { - const url = el.querySelector('.tabs .tab-nav-list a').attributes.href; + const url = el.querySelector('.tabs .tab-nav-list a')?.attributes.href; return parseIdFromUrl(url); }; export const getMovieTitle = (el: HTMLElement): string => { - return el.querySelector('h1').innerText.split(`(`)[0].trim(); + return el.querySelector('h1')?.innerText.split(`(`)[0].trim() || ''; }; export const getMovieGenres = (el: HTMLElement): CSFDGenres[] => { - const genresRaw = el.querySelector('.genres').textContent; - return genresRaw.split(' / ') as CSFDGenres[]; + const genresRaw = el.querySelector('.genres')?.textContent; + return (genresRaw?.split(' / ') as CSFDGenres[]) || []; }; export const getMovieOrigins = (el: HTMLElement): string[] => { - const originsRaw = el.querySelector('.origin').textContent; + const originsRaw = el.querySelector('.origin')?.textContent; + if (!originsRaw) return []; const origins = originsRaw.split(',')[0]; return origins.split(' / '); }; @@ -101,154 +99,129 @@ export const getMovieColorRating = (bodyClasses: string[]): CSFDColorRating => { return getColor(bodyClasses[1]); }; -export const getMovieRating = (el: HTMLElement): number => { - const ratingRaw = el.querySelector('.film-rating-average').textContent; - const rating = ratingRaw?.replace(/%/g, '').trim(); - const ratingInt = parseInt(rating); - - if (Number.isInteger(ratingInt)) { - return ratingInt; - } else { - return null; - } +export const getMovieRating = (el: HTMLElement): number | null => { + const ratingRaw = el.querySelector('.film-rating-average')?.textContent; + if (!ratingRaw) return null; + const rating = parseInt(ratingRaw.replace(/%/g, '').trim()); + return isNaN(rating) ? null : rating; }; -export const getMovieRatingCount = (el: HTMLElement): number => { +export const getMovieRatingCount = (el: HTMLElement): number | null => { const ratingCountRaw = el.querySelector('.box-rating-container .counter')?.textContent; - const ratingCount = +ratingCountRaw?.replace(/[(\s)]/g, ''); - if (Number.isInteger(ratingCount)) { - return ratingCount; - } else { - return null; - } + if (!ratingCountRaw) return null; + const ratingCount = parseInt(ratingCountRaw.replace(/[(\s)]/g, '')); + return isNaN(ratingCount) ? null : ratingCount; }; -export const getMovieYear = (el: string): number => { +export const getMovieYear = (el: string): number | null => { try { const jsonLd = JSON.parse(el); - return +jsonLd.dateCreated; + return +jsonLd.dateCreated || null; } catch (error) { - console.error('node-csfd-api: Error parsing JSON-LD', error); return null; } }; -export const getMovieDuration = (jsonLdRaw: string, el: HTMLElement): number => { - let duration = null; +export const getMovieDuration = (jsonLdRaw: string, el: HTMLElement): number | null => { try { const jsonLd = JSON.parse(jsonLdRaw); - duration = jsonLd.duration; - return parseISO8601Duration(duration); + return parseISO8601Duration(jsonLd.duration); } catch (error) { - const origin = el.querySelector('.origin').innerText; + const origin = el.querySelector('.origin')?.innerText; + if (!origin) return null; + const timeString = origin.split(','); if (timeString.length > 2) { - // Get last time elelment const timeString2 = timeString.pop().trim(); - // Clean it const timeRaw = timeString2.split('(')[0].trim(); - // Split by minutes and hours const hoursMinsRaw = timeRaw.split('min')[0]; const hoursMins = hoursMinsRaw.split('h'); - // Resolve hours + minutes format - duration = hoursMins.length > 1 ? +hoursMins[0] * 60 + +hoursMins[1] : +hoursMins[0]; - return duration; - } else { - return null; + + const hours = hoursMins.length > 1 ? +hoursMins[0] : 0; + const minutes = hoursMins.length > 1 ? +hoursMins[1] : +hoursMins[0]; + + return hours * 60 + minutes; } + return null; } }; export const getMovieTitlesOther = (el: HTMLElement): CSFDTitlesOther[] => { const namesNode = el.querySelectorAll('.film-names li'); + if (!namesNode.length) return []; - if (!namesNode.length) { - return []; - } - - const titlesOther = namesNode.map((el) => { - const country = el.querySelector('img.flag').attributes.alt; - const title = el.textContent.trim().split('\n')[0]; - - if (country && title) { - return { - country, - title - }; - } else { + return namesNode + .map((item) => { + const country = item.querySelector('img.flag')?.attributes.alt; + const title = item.textContent.trim().split('\n')[0]; + if (country && title) { + return { country, title }; + } return null; - } - }); - - return titlesOther.filter((x) => x); + }) + .filter((x): x is CSFDTitlesOther => x !== null); }; -export const getMoviePoster = (el: HTMLElement | null): string => { +export const getMoviePoster = (el: HTMLElement | null): string | null => { + if (!el) return null; const poster = el.querySelector('.film-posters img'); - // Resolve empty image - if (poster) { - if (poster.classNames?.includes('empty-image')) { - return null; - } else { - // Full sized image (not thumb) - const imageThumb = poster.attributes.src.split('?')[0]; - const image = imageThumb.replace(/\/w140\//, '/w1080/'); - return addProtocol(image); - } - } else { - return null; + + if (poster && !poster.classNames?.includes('empty-image')) { + const imageThumb = poster.attributes.src.split('?')[0]; + const image = imageThumb.replace(/\/w140\//, '/w1080/'); + return addProtocol(image); } + return null; }; -export const getMovieRandomPhoto = (el: HTMLElement | null): string => { +export const getMovieRandomPhoto = (el: HTMLElement | null): string | null => { + if (!el) return null; const imageNode = el.querySelector('.gallery-item picture img'); const image = imageNode?.attributes?.src; if (image) { - return image.replace(/\/w663\//, '/w1326/'); - } else { - return null; + return addProtocol(image.replace(/\/w663\//, '/w1326/')); } + return null; }; -export const getMovieTrivia = (el: HTMLElement | null): string[] => { +export const getMovieTrivia = (el: HTMLElement | null): string[] | null => { + if (!el) return null; const triviaNodes = el.querySelectorAll('.article-trivia ul li'); - if (triviaNodes?.length) { + if (triviaNodes.length) { return triviaNodes.map((node) => node.textContent.trim().replace(/(\r\n|\n|\r|\t)/gm, '')); - } else { - return null; } + return null; }; export const getMovieDescriptions = (el: HTMLElement): string[] => { return el .querySelectorAll('.body--plots .plot-full p, .body--plots .plots .plots-item p') - .map((movie) => movie.textContent?.trim().replace(/(\r\n|\n|\r|\t)/gm, '')); + .map((movie) => movie.textContent?.trim().replace(/(\r\n|\n|\r|\t)/gm, '')) + .filter(Boolean); }; const parseMoviePeople = (el: HTMLElement): CSFDMovieCreator[] => { const people = el.querySelectorAll('a'); - return ( - people - // Filter out "more" links - .filter((x) => x.classNames.length === 0) - .map((person) => { - return { - id: parseIdFromUrl(person.attributes.href), - name: person.innerText.trim(), - url: `https://www.csfd.cz${person.attributes.href}` - }; - }) - ); + return people + .filter((x) => x.classNames.length === 0) + .map((person) => ({ + id: parseIdFromUrl(person.attributes.href) || 0, + name: person.innerText.trim(), + url: `https://www.csfd.cz${person.attributes.href}` + })); }; -export const getMovieGroup = (el: HTMLElement, group: CSFDCreatorGroups | CSFDCreatorGroupsEnglish | CSFDCreatorGroupsSlovak): CSFDMovieCreator[] => { +export const getMovieGroup = ( + el: HTMLElement, + group: CSFDCreatorGroups | CSFDCreatorGroupsEnglish | CSFDCreatorGroupsSlovak +): CSFDMovieCreator[] => { const creators = el.querySelectorAll('.creators h4'); - const element = creators.filter((elem) => elem.textContent.trim().includes(group))[0]; + const element = creators.find((elem) => elem.textContent.trim().includes(group)); + if (element?.parentNode) { return parseMoviePeople(element.parentNode as HTMLElement); - } else { - return []; } + return []; }; export const getMovieType = (el: HTMLElement): string => { @@ -257,38 +230,37 @@ export const getMovieType = (el: HTMLElement): string => { }; export const getMovieVods = (el: HTMLElement | null): CSFDVod[] => { - let vods: CSFDVod[] = []; - if (el) { - const buttons = el.querySelectorAll('.box-buttons .button'); - const buttonsVod = buttons.filter((x) => !x.classNames.includes('button-social')); - vods = buttonsVod.map((btn) => { - return { - title: btn.textContent.trim() as CSFDVodService, - url: btn.attributes.href - }; - }); - } - return vods.length ? vods : []; + if (!el) return []; + const buttons = el.querySelectorAll('.box-buttons .button'); + const buttonsVod = buttons.filter((x) => !x.classNames.includes('button-social')); + + return buttonsVod.map((btn) => ({ + title: btn.textContent.trim() as CSFDVodService, + url: btn.attributes.href + })); }; -// Get box content -const getBoxContent = (el: HTMLElement, box: string): HTMLElement => { +const getBoxContent = (el: HTMLElement, box: string): HTMLElement | undefined => { const headers = el.querySelectorAll('section.box .box-header'); return headers.find((header) => header.querySelector('h3')?.textContent.trim().includes(box)) - ?.parentNode; + ?.parentNode as HTMLElement; }; export const getMovieBoxMovies = (el: HTMLElement, boxName: CSFDBoxContent): CSFDMovieListItem[] => { const movieListItem: CSFDMovieListItem[] = []; const box = getBoxContent(el, boxName); + const movieTitleNodes = box?.querySelectorAll('.article-header .film-title-name'); if (movieTitleNodes?.length) { for (const item of movieTitleNodes) { - movieListItem.push({ - id: parseIdFromUrl(item.attributes.href), - title: item.textContent.trim(), - url: `https://www.csfd.cz${item.attributes.href}` - }); + const id = parseIdFromUrl(item.attributes.href); + if (id) { + movieListItem.push({ + id, + title: item.textContent.trim(), + url: `https://www.csfd.cz${item.attributes.href}` + }); + } } } return movieListItem; @@ -297,15 +269,15 @@ export const getMovieBoxMovies = (el: HTMLElement, boxName: CSFDBoxContent): CSF export const getMoviePremieres = (el: HTMLElement): CSFDPremiere[] => { const premiereNodes = el.querySelectorAll('.box-premieres li'); const premiere: CSFDPremiere[] = []; + for (const premiereNode of premiereNodes) { - const title = premiereNode.querySelector('p + span').attributes.title; + const title = premiereNode.querySelector('p + span')?.attributes.title; if (title) { - const [date, ...company] = title?.split(' '); - + const [date, ...company] = title.split(' '); premiere.push({ country: premiereNode.querySelector('.flag')?.attributes.title || null, - format: premiereNode.querySelector('p').textContent.trim()?.split(' od')[0], + format: premiereNode.querySelector('p')?.textContent.trim()?.split(' od')[0] || '', date, company: company.join(' ') }); diff --git a/src/index.ts b/src/index.ts index b672d235..6f950304 100644 --- a/src/index.ts +++ b/src/index.ts @@ -95,5 +95,7 @@ export const csfd = new Csfd( cinemaScraper ); +export { CsfdError } from './fetchers'; +export type { CSFDOptions } from './types'; export type * from './dto'; diff --git a/tests/fetchers.test.ts b/tests/fetchers.test.ts index 714ce01f..2f1a5ef6 100644 --- a/tests/fetchers.test.ts +++ b/tests/fetchers.test.ts @@ -4,7 +4,7 @@ import { CSFDCinema } from '../src/dto/cinema'; import { CSFDCreator, CSFDCreatorScreening } from '../src/dto/creator'; import { CSFDColorRating, CSFDFilmTypes } from '../src/dto/global'; import { CSFDMovie } from '../src/dto/movie'; -import { fetchPage } from '../src/fetchers'; +import { fetchPage, CsfdError } from '../src/fetchers'; import { movieUrl, userRatingsUrl } from '../src/vars'; const badId = 999999999999999; @@ -260,10 +260,9 @@ describe('User page 404', () => { test('Fetch error URL', async () => { try { const url = userRatingsUrl(badId); - const html = await fetchPage(url); - expect(html).toBe('Error'); + await fetchPage(url); } catch (e) { - expect(e).toContain(Error); + expect(e).toBeInstanceOf(CsfdError); } }); }); @@ -272,10 +271,9 @@ describe('Movie page 404', () => { test('Fetch error URL', async () => { try { const url = movieUrl(badId, {}); - const html = await fetchPage(url); - expect(html).toBe('Error'); + await fetchPage(url); } catch (e) { - expect(e).toThrow(Error); + expect(e).toBeInstanceOf(CsfdError); } }); }); diff --git a/tests/global.test.ts b/tests/global.test.ts index 41beee4f..4f1a759c 100644 --- a/tests/global.test.ts +++ b/tests/global.test.ts @@ -1,39 +1,5 @@ import { describe, expect, test } from 'vitest'; import { csfd } from '../src'; -import { getDuration } from '../src/helpers/global.helper'; - -export const durationInput = [ - 'PT142M', - undefined, - undefined, - undefined, - undefined, - undefined, - undefined, - '142', - undefined, - { index: 0 }, - { input: 'PT142M' }, - { groups: undefined } -]; - -const result = { - sign: '+', - years: 0, - months: 0, - weeks: 0, - days: 0, - hours: 0, - minutes: '142', - seconds: 0 -}; - -describe('Live: Fetch rating page', () => { - test('Resolve duration', async () => { - const resolver = getDuration(durationInput); - expect(resolver).toEqual(result); - }); -}); describe('CSFD setOptions', () => { test('Should set custom options', async () => { diff --git a/tests/helpers.test.ts b/tests/helpers.test.ts index 11a0c3f7..9f1cff46 100644 --- a/tests/helpers.test.ts +++ b/tests/helpers.test.ts @@ -19,7 +19,7 @@ describe('Add protocol', () => { describe('Parse Id', () => { test('Handle whole movie url', () => { const url = parseIdFromUrl('https://www.csfd.cz/film/906693-projekt-adam/recenze/'); - expect(url).toBe(null); + expect(url).toBe(906693); }); test('Handle movie url', () => { const url = parseIdFromUrl('/film/906693-projekt-adam/recenze/'); diff --git a/tests/movie.test.ts b/tests/movie.test.ts index 1cf9f189..9754997c 100644 --- a/tests/movie.test.ts +++ b/tests/movie.test.ts @@ -165,7 +165,7 @@ describe('Get Movie photo', () => { test('Movie photo', () => { const movie = getMovieRandomPhoto(movieNode); expect(movie).toEqual( - '//image.pmgstatic.com/cache/resized/w1326/files/images/film/photos/163/748/163748964_3f9a56.jpg' + 'https://image.pmgstatic.com/cache/resized/w1326/files/images/film/photos/163/748/163748964_3f9a56.jpg' ); }); test('Movie Blank photo', () => { @@ -175,7 +175,7 @@ describe('Get Movie photo', () => { test('Movie Series photo', () => { const movie = getMovieRandomPhoto(seriesNode); expect(movie).toEqual( - '//image.pmgstatic.com/cache/resized/w1326/files/images/film/photos/166/598/166598545_663234.jpg' + 'https://image.pmgstatic.com/cache/resized/w1326/files/images/film/photos/166/598/166598545_663234.jpg' ); }); test('Movie empty node', () => { diff --git a/vitest.config.mts b/vitest.config.mts index 12ee59e3..e098738c 100644 --- a/vitest.config.mts +++ b/vitest.config.mts @@ -2,6 +2,7 @@ import { configDefaults, defineConfig } from 'vitest/config'; export default defineConfig({ test: { + testTimeout: 30000, coverage: { provider: 'istanbul', exclude: [...configDefaults.exclude, 'demo.ts', '**/*.polyfill.ts', 'vars.ts', 'server.ts']