Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
},
"dependencies": {
"cross-fetch": "^4.1.0",
"node-html-parser": "^7.0.2"
"node-html-parser": "^7.0.2",
"valibot": "^1.2.0"
},
"devDependencies": {
"@babel/preset-typescript": "^7.28.5",
Expand Down Expand Up @@ -98,8 +99,8 @@
"types": "./dist/index.d.ts",
"exports": {
".": {
"require": "./dist/index.js",
"import": "./dist/index.mjs"
"import": "./dist/index.mjs",
"require": "./dist/index.js"
},
"./package.json": "./package.json"
},
Expand Down
42 changes: 42 additions & 0 deletions src/dto/user-ratings.schema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import * as v from 'valibot';

export const CSFDColorRatingSchema = v.union([
v.literal('bad'),
v.literal('average'),
v.literal('good'),
v.literal('unknown')
]);

export const CSFDStarsSchema = v.union([
v.literal(0),
v.literal(1),
v.literal(2),
v.literal(3),
v.literal(4),
v.literal(5)
]);

export const CSFDFilmTypesSchema = v.union([
v.literal('film'),
v.literal('TV film'),
v.literal('pořad'),
v.literal('seriál'),
v.literal('divadelní záznam'),
v.literal('koncert'),
v.literal('série'),
v.literal('studentský film'),
v.literal('amatérský film'),
v.literal('hudební videoklip'),
v.literal('epizoda')
]);

export const CSFDUserRatingsSchema = v.object({
id: v.number(),
title: v.string(),
year: v.number(),
url: v.string(),
type: CSFDFilmTypesSchema,
colorRating: CSFDColorRatingSchema,
userRating: CSFDStarsSchema,
userDate: v.string()
});
15 changes: 15 additions & 0 deletions src/dto/user-reviews.schema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import * as v from 'valibot';
import { CSFDColorRatingSchema, CSFDFilmTypesSchema, CSFDStarsSchema } from './user-ratings.schema';

export const CSFDUserReviewsSchema = v.object({
id: v.number(),
title: v.string(),
year: v.number(),
url: v.string(),
type: CSFDFilmTypesSchema,
colorRating: CSFDColorRatingSchema,
userRating: CSFDStarsSchema,
userDate: v.string(),
text: v.string(),
poster: v.nullable(v.string())
});
2 changes: 1 addition & 1 deletion src/dto/user-reviews.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export interface CSFDUserReviews extends CSFDScreening {
userRating: CSFDStars;
userDate: string; // TODO datetime
text: string;
poster: string;
poster: string | null;
}

export interface CSFDUserReviewsConfig {
Expand Down
9 changes: 8 additions & 1 deletion src/helpers/user-reviews.helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@ export const getUserReviewType = (el: HTMLElement): CSFDFilmTypes => {
// Type can be in the second .info span (e.g., "(seriál)") // TODO need more tests
const typeText = el.querySelectorAll('.film-title-info .info');

return (typeText.length > 1 ? typeText[1].text.slice(1, -1) : 'film') as CSFDFilmTypes;
if (typeText.length > 1) {
const text = typeText[1].text.trim();
if (text.startsWith('(') && text.endsWith(')')) {
return text.slice(1, -1) as CSFDFilmTypes;
}
return text as CSFDFilmTypes;
}
return 'film';
};

export const getUserReviewTitle = (el: HTMLElement): string => {
Expand Down
34 changes: 27 additions & 7 deletions src/services/user-ratings.service.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { HTMLElement, parse } from 'node-html-parser';
import { flatten, safeParse } from 'valibot';
import { CSFDColorRating, CSFDStars } from '../dto/global';
import { CSFDUserRatingConfig, CSFDUserRatings } from '../dto/user-ratings';
import { CSFDUserRatingsSchema } from '../dto/user-ratings.schema';
import { fetchPage } from '../fetchers';
import { sleep } from '../helpers/global.helper';
import {
Expand Down Expand Up @@ -76,19 +78,37 @@ export class UserRatingsScraper {
for (const el of movies) {
const type = getUserRatingType(el);

let shouldProcess = true;

// Filtering includesOnly
if (config?.includesOnly?.length) {
if (config.includesOnly.some((include) => type === include)) {
films.push(this.buildUserRatings(el));
if (!config.includesOnly.some((include) => type === include)) {
shouldProcess = false;
}
// Filter excludes
} else if (config?.excludes?.length) {
if (!config.excludes.some((exclude) => type === exclude)) {
films.push(this.buildUserRatings(el));
if (config.excludes.some((exclude) => type === exclude)) {
shouldProcess = false;
}
}

if (shouldProcess) {
try {
const item = this.buildUserRatings(el);
const result = safeParse(CSFDUserRatingsSchema, item);
if (result.success) {
films.push(result.output as CSFDUserRatings);
} else {
console.warn(
`Skipping invalid user rating. Title: ${item.title}, ID: ${item.id}`,
JSON.stringify(flatten(result.issues))
);
}
} catch (e) {
console.warn(
`Skipping user rating due to scraping error (DOM change?): ${(e as Error).message}`
);
}
} else {
// Without filtering
films.push(this.buildUserRatings(el));
}
Comment on lines 78 to 112
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

getUserRatingType(el) on line 79 is outside the try/catch, so a DOM extraction error there will still abort the entire loop.

The try/catch on lines 96–111 protects buildUserRatings, but getUserRatingType(el) at line 79 performs the same kind of DOM scraping and can throw identically. An error there skips the catch block and propagates up, aborting all remaining items on the page.

Move the getUserRatingType call inside the try block (or wrap the entire loop body), and pass type into buildUserRatings to avoid calling it twice.

Proposed fix
     for (const el of movies) {
-      const type = getUserRatingType(el);
-
-      let shouldProcess = true;
-
-      // Filtering includesOnly
-      if (config?.includesOnly?.length) {
-        if (!config.includesOnly.some((include) => type === include)) {
-          shouldProcess = false;
-        }
-        // Filter excludes
-      } else if (config?.excludes?.length) {
-        if (config.excludes.some((exclude) => type === exclude)) {
-          shouldProcess = false;
-        }
-      }
-
-      if (shouldProcess) {
-        try {
-          const item = this.buildUserRatings(el);
+      try {
+        const type = getUserRatingType(el);
+
+        let shouldProcess = true;
+
+        // Filtering includesOnly
+        if (config?.includesOnly?.length) {
+          if (!config.includesOnly.some((include) => type === include)) {
+            shouldProcess = false;
+          }
+          // Filter excludes
+        } else if (config?.excludes?.length) {
+          if (config.excludes.some((exclude) => type === exclude)) {
+            shouldProcess = false;
+          }
+        }
+
+        if (shouldProcess) {
+          const item = this.buildUserRatings(el, type);
           const result = safeParse(CSFDUserRatingsSchema, item);
           if (result.success) {
             films.push(result.output as CSFDUserRatings);
           } else {
             console.warn(
               `Skipping invalid user rating. Title: ${item.title}, ID: ${item.id}`,
               JSON.stringify(flatten(result.issues))
             );
           }
-        } catch (e) {
-          console.warn(
-            `Skipping user rating due to scraping error (DOM change?): ${(e as Error).message}`
-          );
         }
+      } catch (e) {
+        console.warn(
+          `Skipping user rating due to scraping error (DOM change?): ${(e as Error).message}`
+        );
       }
     }

And update buildUserRatings to accept and reuse the already-extracted type:

-  private buildUserRatings(el: HTMLElement): CSFDUserRatings {
+  private buildUserRatings(el: HTMLElement, type: string): CSFDUserRatings {
     return {
       id: getUserRatingId(el),
       title: getUserRatingTitle(el),
       year: getUserRatingYear(el),
-      type: getUserRatingType(el),
+      type,
       url: getUserRatingUrl(el),
       colorRating: getUserRatingColorRating(el) as CSFDColorRating,
       userDate: getUserRatingDate(el),
       userRating: getUserRating(el) as CSFDStars
     };
   }
🤖 Prompt for AI Agents
In `@src/services/user-ratings.service.ts` around lines 78 - 112,
getUserRatingType(el) is called outside the try/catch and can throw, aborting
the loop; move the extraction into the try block so DOM errors are caught,
capture the returned type and pass it into buildUserRatings to avoid
re-extraction, and update buildUserRatings' signature to accept the precomputed
type (then use that value instead of calling getUserRatingType again); keep the
existing validation flow with safeParse(CSFDUserRatingsSchema) and the existing
warning logs.

}
return films;
Expand Down
34 changes: 27 additions & 7 deletions src/services/user-reviews.service.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { HTMLElement, parse } from 'node-html-parser';
import { flatten, safeParse } from 'valibot';
import { CSFDColorRating, CSFDStars } from '../dto/global';
import { CSFDUserReviews, CSFDUserReviewsConfig } from '../dto/user-reviews';
import { CSFDUserReviewsSchema } from '../dto/user-reviews.schema';
import { fetchPage } from '../fetchers';
import { sleep } from '../helpers/global.helper';
import {
Expand Down Expand Up @@ -78,19 +80,37 @@ export class UserReviewsScraper {
for (const el of reviews) {
const type = getUserReviewType(el);

let shouldProcess = true;

// Filtering includesOnly
if (config?.includesOnly?.length) {
if (config.includesOnly.some((include) => type === include)) {
films.push(this.buildUserReviews(el));
if (!config.includesOnly.some((include) => type === include)) {
shouldProcess = false;
}
// Filter excludes
} else if (config?.excludes?.length) {
if (!config.excludes.some((exclude) => type === exclude)) {
films.push(this.buildUserReviews(el));
if (config.excludes.some((exclude) => type === exclude)) {
shouldProcess = false;
}
}

if (shouldProcess) {
try {
const item = this.buildUserReviews(el);
const result = safeParse(CSFDUserReviewsSchema, item);
if (result.success) {
films.push(result.output as CSFDUserReviews);
} else {
console.warn(
`Skipping invalid user review. Title: ${item.title}, ID: ${item.id}`,
JSON.stringify(flatten(result.issues))
);
}
} catch (e) {
console.warn(
`Skipping user review due to scraping error (DOM change?): ${(e as Error).message}`
);
}
} else {
// Without filtering
films.push(this.buildUserReviews(el));
}
}
return films;
Expand Down
5 changes: 5 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3156,6 +3156,11 @@ uri-js@^4.2.2:
dependencies:
punycode "^2.1.0"

valibot@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/valibot/-/valibot-1.2.0.tgz#8fc720d9e4082ba16e30a914064a39619b2f1d6f"
integrity sha512-mm1rxUsmOxzrwnX5arGS+U4T25RdvpPjPN4yR0u9pUBov9+zGVtO84tif1eY4r6zWxVxu3KzIyknJy3rxfRZZg==

vary@^1.1.2:
version "1.1.2"
resolved "https://registry.yarnpkg.com/vary/-/vary-1.1.2.tgz#2299f02c6ded30d4a5961b0b9f74524a18f634fc"
Expand Down