From d96205517e40af5a769d7b5f9d3a1ade8670dcc3 Mon Sep 17 00:00:00 2001 From: Gil Nobrega <82336674+gilnobrega@users.noreply.github.com> Date: Tue, 20 Jan 2026 18:04:57 +0900 Subject: [PATCH 1/7] Create ExpireAfterCachingStrategy --- package.json | 4 ++++ src/data/repositories/RobotsDataRepository.ts | 16 ++++++++++++-- src/domain/models/CachedRobot.ts | 4 ++++ src/domain/models/CachingPolicy.ts | 15 +++++++++++++ src/domain/models/CachingPolicyType.ts | 13 ++++++++++++ src/domain/models/RobotsPluginOptions.ts | 6 ++++++ .../strategies/CachingStrategyFactory.ts | 18 ++++++++++++++++ .../strategies/ExpireAfterCachingStrategy.ts | 21 +++++++++++++++++++ src/domain/strategies/ICachingStrategy.ts | 13 ++++++++++++ .../strategies/IndefiniteCachingStrategy.ts | 8 +++++++ src/index.ts | 6 ++++++ src/interceptor.ts | 2 +- 12 files changed, 123 insertions(+), 3 deletions(-) create mode 100644 src/domain/models/CachingPolicy.ts create mode 100644 src/domain/models/CachingPolicyType.ts create mode 100644 src/domain/strategies/CachingStrategyFactory.ts create mode 100644 src/domain/strategies/ExpireAfterCachingStrategy.ts create mode 100644 src/domain/strategies/ICachingStrategy.ts create mode 100644 src/domain/strategies/IndefiniteCachingStrategy.ts diff --git a/package.json b/package.json index 715fd8a..0728ed0 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "homepage": "https://github.com/hyperfluid-tech/axios-robots#readme", "devDependencies": { "@types/jest": "^30.0.0", + "@types/ms": "^2.1.0", "@types/node": "^25.0.6", "axios": "^1.13.2", "jest": "^30.2.0", @@ -42,5 +43,8 @@ "robots-parser": "^3.0.1", "ts-jest": "^29.4.6", "typescript": "^5.9.3" + }, + "dependencies": { + "ms": "^2.1.3" } } diff --git a/src/data/repositories/RobotsDataRepository.ts b/src/data/repositories/RobotsDataRepository.ts index 4bd0ac7..9a4e68a 100644 --- a/src/data/repositories/RobotsDataRepository.ts +++ b/src/data/repositories/RobotsDataRepository.ts @@ -4,19 +4,31 @@ import { HEADER_USER_AGENT, ROBOTS_TXT_FILENAME, ALLOW_ALL_ROBOTS_TXT_CONTENT } import { RobotsUnreachableError } from '../../errors/RobotsUnreachableError'; import { IRobotsDataRepository } from '../../domain/interfaces/IRobotsDataRepository'; import { CachedRobot } from '../../domain/models/CachedRobot'; +import { RobotsPluginOptions } from '../../domain/models/RobotsPluginOptions'; +import { CachingPolicy } from '../../domain/models/CachingPolicy'; +import { CachingStrategyFactory } from '../../domain/strategies/CachingStrategyFactory'; +import { CachingPolicyType } from '../../domain/models/CachingPolicyType'; export class RobotsDataRepository implements IRobotsDataRepository { private cache: Map = new Map(); + private cachingPolicy: CachingPolicy; + private strategyFactory: CachingStrategyFactory; + + constructor(options?: RobotsPluginOptions) { + this.cachingPolicy = options?.cachingPolicy ?? { type: CachingPolicyType.Indefinite }; + this.strategyFactory = new CachingStrategyFactory(); + } async getRobot(url: string, userAgent: string = '*'): Promise { const origin = new URL(url).origin; let cached = this.cache.get(origin); - if (cached) + if (cached && this.strategyFactory.getStrategy(this.cachingPolicy).isValid(cached)) { return cached; + } const robot = await this.fetchRobotsTxt(origin, userAgent); - cached = { robot }; + cached = { robot, fetchedAt: Date.now() }; this.cache.set(origin, cached); return cached; diff --git a/src/domain/models/CachedRobot.ts b/src/domain/models/CachedRobot.ts index cf1dc81..982d40e 100644 --- a/src/domain/models/CachedRobot.ts +++ b/src/domain/models/CachedRobot.ts @@ -9,4 +9,8 @@ export interface CachedRobot { * Timestamp of the last crawl for this domain. */ lastCrawled?: number; + /** + * Timestamp of when the robots.txt was fetched. + */ + fetchedAt: number; } diff --git a/src/domain/models/CachingPolicy.ts b/src/domain/models/CachingPolicy.ts new file mode 100644 index 0000000..ffcf9d7 --- /dev/null +++ b/src/domain/models/CachingPolicy.ts @@ -0,0 +1,15 @@ +import { CachingPolicyType } from './CachingPolicyType'; + +export type CachingPolicy = IndefiniteCachingPolicy | ExpireAfterCachingPolicy; + +export interface IndefiniteCachingPolicy { + type: CachingPolicyType.Indefinite; +} + +export interface ExpireAfterCachingPolicy { + type: CachingPolicyType.ExpireAfter; + /** + * Duration in milliseconds or a string format supported by the 'ms' library (e.g., '1h', '5m'). + */ + duration: string | number; +} diff --git a/src/domain/models/CachingPolicyType.ts b/src/domain/models/CachingPolicyType.ts new file mode 100644 index 0000000..0eb5bd5 --- /dev/null +++ b/src/domain/models/CachingPolicyType.ts @@ -0,0 +1,13 @@ +/** + * Types of caching policies for robots.txt data. + */ +export enum CachingPolicyType { + /** + * Cache robots.txt data indefinitely. + */ + Indefinite = 'indefinitely', + /** + * Cache robots.txt data for a specific duration. + */ + ExpireAfter = 'expireAfter' +} diff --git a/src/domain/models/RobotsPluginOptions.ts b/src/domain/models/RobotsPluginOptions.ts index 7522d11..0432d5f 100644 --- a/src/domain/models/RobotsPluginOptions.ts +++ b/src/domain/models/RobotsPluginOptions.ts @@ -1,4 +1,5 @@ import { CrawlDelayComplianceMode } from './CrawlDelayComplianceMode'; +import { CachingPolicy } from './CachingPolicy'; export interface RobotsPluginOptions { /** @@ -10,4 +11,9 @@ export interface RobotsPluginOptions { * Defaults to CrawlDelayComplianceMode.Await */ crawlDelayCompliance?: CrawlDelayComplianceMode; + /** + * How to handle caching of robots.txt data. + * Defaults to indefinitely. + */ + cachingPolicy?: CachingPolicy; } diff --git a/src/domain/strategies/CachingStrategyFactory.ts b/src/domain/strategies/CachingStrategyFactory.ts new file mode 100644 index 0000000..2ff79b5 --- /dev/null +++ b/src/domain/strategies/CachingStrategyFactory.ts @@ -0,0 +1,18 @@ +import { CachingPolicy } from '../models/CachingPolicy'; +import { CachingPolicyType } from '../models/CachingPolicyType'; +import { ICachingStrategy } from './ICachingStrategy'; +import { IndefiniteCachingStrategy } from './IndefiniteCachingStrategy'; +import { ExpireAfterCachingStrategy } from './ExpireAfterCachingStrategy'; + +export class CachingStrategyFactory { + getStrategy(policy: CachingPolicy): ICachingStrategy { + switch (policy.type) { + case CachingPolicyType.Indefinite: + return new IndefiniteCachingStrategy(); + case CachingPolicyType.ExpireAfter: + return new ExpireAfterCachingStrategy(policy.duration); + default: + return new IndefiniteCachingStrategy(); + } + } +} diff --git a/src/domain/strategies/ExpireAfterCachingStrategy.ts b/src/domain/strategies/ExpireAfterCachingStrategy.ts new file mode 100644 index 0000000..44d122f --- /dev/null +++ b/src/domain/strategies/ExpireAfterCachingStrategy.ts @@ -0,0 +1,21 @@ +import ms from 'ms'; +import { ICachingStrategy } from './ICachingStrategy'; +import { CachedRobot } from '../models/CachedRobot'; + +export class ExpireAfterCachingStrategy implements ICachingStrategy { + private durationMs: number; + + constructor(duration: string | number) { + if (typeof duration === 'string') { + this.durationMs = ms(duration as any) as unknown as number; + return; + } + this.durationMs = duration; + } + + isValid(cached: CachedRobot): boolean { + const now = Date.now(); + const expirationTime = cached.fetchedAt + this.durationMs; + return now < expirationTime; + } +} diff --git a/src/domain/strategies/ICachingStrategy.ts b/src/domain/strategies/ICachingStrategy.ts new file mode 100644 index 0000000..812296f --- /dev/null +++ b/src/domain/strategies/ICachingStrategy.ts @@ -0,0 +1,13 @@ +import { CachedRobot } from '../models/CachedRobot'; + +/** + * Strategy interface for validating cached robots.txt data. + */ +export interface ICachingStrategy { + /** + * Determines whether the cached robot data is still valid. + * @param cached The cached robot data to validate. + * @returns True if the cache is valid, false if specific data should be refreshed. + */ + isValid(cached: CachedRobot): boolean; +} diff --git a/src/domain/strategies/IndefiniteCachingStrategy.ts b/src/domain/strategies/IndefiniteCachingStrategy.ts new file mode 100644 index 0000000..c165a2b --- /dev/null +++ b/src/domain/strategies/IndefiniteCachingStrategy.ts @@ -0,0 +1,8 @@ +import { ICachingStrategy } from './ICachingStrategy'; +import { CachedRobot } from '../models/CachedRobot'; + +export class IndefiniteCachingStrategy implements ICachingStrategy { + isValid(cached: CachedRobot): boolean { + return true; + } +} diff --git a/src/index.ts b/src/index.ts index 004ed75..d84e058 100644 --- a/src/index.ts +++ b/src/index.ts @@ -14,6 +14,12 @@ export * from './domain/interfaces/IRobotsDataRepository'; export * from './domain/interfaces/IAllowService'; export * from './domain/interfaces/ICrawlDelayService'; +export * from './domain/models/CachingPolicy'; +export * from './domain/models/CachingPolicyType'; +export * from './domain/strategies/ICachingStrategy'; +export * from './domain/strategies/IndefiniteCachingStrategy'; +export * from './domain/strategies/ExpireAfterCachingStrategy'; +export * from './domain/strategies/CachingStrategyFactory'; /** * Apply the robots exclusion protocol interceptor to an Axios instance. * @param axiosInstance The axios instance to apply the interceptor to diff --git a/src/interceptor.ts b/src/interceptor.ts index f6b09e8..630b8e3 100644 --- a/src/interceptor.ts +++ b/src/interceptor.ts @@ -30,7 +30,7 @@ export class RobotsInterceptor { this.userAgent = options.userAgent; this.crawlDelayCompliance = options.crawlDelayCompliance ?? CrawlDelayComplianceMode.Await; - this.dataService = deps?.dataService ?? new RobotsDataRepository(); + this.dataService = deps?.dataService ?? new RobotsDataRepository(options); this.allowService = deps?.allowService ?? new AllowService(this.dataService); this.crawlDelayService = deps?.crawlDelayService ?? new CrawlDelayService(this.dataService); } From 5ad7289dfed40ca10d4824aef1ba205a1d583057 Mon Sep 17 00:00:00 2001 From: Gil Nobrega <82336674+gilnobrega@users.noreply.github.com> Date: Tue, 20 Jan 2026 18:06:39 +0900 Subject: [PATCH 2/7] create unit tests --- .../repositories/RobotsDataRepository.test.ts | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 tests/unit/data/repositories/RobotsDataRepository.test.ts diff --git a/tests/unit/data/repositories/RobotsDataRepository.test.ts b/tests/unit/data/repositories/RobotsDataRepository.test.ts new file mode 100644 index 0000000..e47a9db --- /dev/null +++ b/tests/unit/data/repositories/RobotsDataRepository.test.ts @@ -0,0 +1,99 @@ +import { RobotsDataRepository } from '../../../../src/data/repositories/RobotsDataRepository'; +import { RobotsPluginOptions } from '../../../../src/domain/models/RobotsPluginOptions'; +import { CachingPolicyType } from '../../../../src/domain/models/CachingPolicyType'; +import axios from 'axios'; +import robotsParser from 'robots-parser'; + +jest.mock('axios'); +jest.mock('robots-parser'); + +const mockAxios = axios as unknown as jest.Mocked; +const mockRobotsParser = robotsParser as unknown as jest.MockedFunction; + +describe('RobotsDataRepository', () => { + let repository: RobotsDataRepository; + const origin = 'https://example.com'; + const userAgent = 'test-bot'; + + beforeEach(() => { + jest.clearAllMocks(); + mockAxios.create.mockReturnValue({ + get: jest.fn().mockResolvedValue({ data: 'User-agent: *\nDisallow: /' }) + } as any); + mockRobotsParser.mockReturnValue({ + isAllowed: jest.fn(), + isDisallowed: jest.fn(), + getMatchingLineNumber: jest.fn(), + getCrawlDelay: jest.fn(), + getSitemaps: jest.fn(), + getPreferredHost: jest.fn(), + }); + }); + + describe('Caching Behavior', () => { + test(` + GIVEN an indefinite (default) caching policy + WHEN robots.txt is requested twice with a long time gap + THEN it should only fetch from the network once + `, async () => { + repository = new RobotsDataRepository({ userAgent } as RobotsPluginOptions); + const future = Date.now() + 100 * 24 * 60 * 60 * 1000; + + await repository.getRobot(origin, userAgent); + jest.spyOn(Date, 'now').mockReturnValue(future); + await repository.getRobot(origin, userAgent); + + expect(mockAxios.create).toHaveBeenCalledTimes(1); + }); + + test(` + GIVEN an expireAfter caching policy + WHEN robots.txt is requested after the expiration duration + THEN it should fetch from the network again + `, async () => { + const duration = '5m'; + const durationMs = 5 * 60 * 1000; + repository = new RobotsDataRepository({ + userAgent, + cachingPolicy: { + type: CachingPolicyType.ExpireAfter, + duration + } + } as RobotsPluginOptions); + const initialTime = 1000; + const expiredTime = initialTime + durationMs + 1; + jest.spyOn(Date, 'now').mockReturnValue(initialTime); + + await repository.getRobot(origin, userAgent); + jest.spyOn(Date, 'now').mockReturnValue(expiredTime); + await repository.getRobot(origin, userAgent); + + expect(mockAxios.create).toHaveBeenCalledTimes(2); + }); + + test(` + GIVEN an expireAfter caching policy + WHEN robots.txt is requested before the expiration duration + THEN it should return the cached data without refetching + `, async () => { + const duration = 5 * 60 * 1000; + const durationMs = duration; + repository = new RobotsDataRepository({ + userAgent, + cachingPolicy: { + type: CachingPolicyType.ExpireAfter, + duration + } + } as RobotsPluginOptions); + const initialTime = 1000; + const validTime = initialTime + durationMs - 1; + jest.spyOn(Date, 'now').mockReturnValue(initialTime); + + await repository.getRobot(origin, userAgent); + jest.spyOn(Date, 'now').mockReturnValue(validTime); + await repository.getRobot(origin, userAgent); + + expect(mockAxios.create).toHaveBeenCalledTimes(1); + }); + }); +}); From cc0dc7c4566284d03c4a2b46c5a1e246d465b08e Mon Sep 17 00:00:00 2001 From: Gil Nobrega <82336674+gilnobrega@users.noreply.github.com> Date: Tue, 20 Jan 2026 18:08:49 +0900 Subject: [PATCH 3/7] create integration tests --- tests/integration/caching.test.ts | 115 ++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 tests/integration/caching.test.ts diff --git a/tests/integration/caching.test.ts b/tests/integration/caching.test.ts new file mode 100644 index 0000000..db22ff5 --- /dev/null +++ b/tests/integration/caching.test.ts @@ -0,0 +1,115 @@ +import axios from 'axios'; +import nock from 'nock'; +import { applyRobotsInterceptor } from '../../src/index'; +import { CachingPolicyType } from '../../src/domain/models/CachingPolicyType'; + +describe('Caching Policy Integration', () => { + let client: ReturnType; + const USER_AGENT = 'TestBot/1.0'; + const DOMAIN = 'https://example.com'; + + beforeEach(() => { + nock.cleanAll(); + jest.clearAllMocks(); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + test(` + GIVEN an indefinite caching policy (default) + WHEN multiple requests are made over a long period + THEN robots.txt should only be fetched once + `, async () => { + const initialTime = 1672531200000; // 2023-01-01 + jest.spyOn(Date, 'now').mockReturnValue(initialTime); + + client = axios.create(); + applyRobotsInterceptor(client, { userAgent: USER_AGENT }); + + const robotsScope = nock(DOMAIN) + .get('/robots.txt') + .times(1) + .reply(200, `User-agent: *\nAllow: /`); + + nock(DOMAIN).get('/first').reply(200, 'OK'); + nock(DOMAIN).get('/second').reply(200, 'OK'); + + await client.get(`${DOMAIN}/first`); + + jest.spyOn(Date, 'now').mockReturnValue(initialTime + 1000 * 60 * 60 * 24 * 100); + + await client.get(`${DOMAIN}/second`); + + expect(robotsScope.isDone()).toBe(true); + }); + + test(` + GIVEN an expireAfter caching policy of 5 minutes + WHEN a second request is made after 6 minutes + THEN robots.txt should be fetched again + `, async () => { + const initialTime = 1672531200000; + jest.spyOn(Date, 'now').mockReturnValue(initialTime); + + client = axios.create(); + applyRobotsInterceptor(client, { + userAgent: USER_AGENT, + cachingPolicy: { + type: CachingPolicyType.ExpireAfter, + duration: '5m' + } + }); + + const robotsScope = nock(DOMAIN) + .get('/robots.txt') + .times(2) + .reply(200, `User-agent: *\nAllow: /`); + + nock(DOMAIN).get('/first').reply(200, 'OK'); + nock(DOMAIN).get('/second').reply(200, 'OK'); + + await client.get(`${DOMAIN}/first`); + + jest.spyOn(Date, 'now').mockReturnValue(initialTime + 1000 * 60 * 6); + + await client.get(`${DOMAIN}/second`); + + expect(robotsScope.isDone()).toBe(true); + }); + + test(` + GIVEN an expireAfter caching policy of 5 minutes + WHEN a second request is made within 4 minutes + THEN robots.txt should NOT be fetched again + `, async () => { + const initialTime = 1672531200000; + jest.spyOn(Date, 'now').mockReturnValue(initialTime); + + client = axios.create(); + applyRobotsInterceptor(client, { + userAgent: USER_AGENT, + cachingPolicy: { + type: CachingPolicyType.ExpireAfter, + duration: '5m' + } + }); + + const robotsScope = nock(DOMAIN) + .get('/robots.txt') + .times(1) + .reply(200, `User-agent: *\nAllow: /`); + + nock(DOMAIN).get('/first').reply(200, 'OK'); + nock(DOMAIN).get('/second').reply(200, 'OK'); + + await client.get(`${DOMAIN}/first`); + + jest.spyOn(Date, 'now').mockReturnValue(initialTime + 1000 * 60 * 4); + + await client.get(`${DOMAIN}/second`); + + expect(robotsScope.isDone()).toBe(true); + }); +}); From a27a198988ee6e4a599d81a01ff5ba726d734344 Mon Sep 17 00:00:00 2001 From: Gil Nobrega <82336674+gilnobrega@users.noreply.github.com> Date: Tue, 20 Jan 2026 18:10:45 +0900 Subject: [PATCH 4/7] add documentation --- README.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8e44666..24dd5cd 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,7 @@ Attaches the interceptor to the provided Axios instance. interface RobotsPluginOptions { userAgent: string; crawlDelayCompliance?: CrawlDelayComplianceMode; // default: CrawlDelayComplianceMode.Await + cachingPolicy?: CachingPolicy; // default: Indefinite (caches forever) } enum CrawlDelayComplianceMode { @@ -92,6 +93,25 @@ enum CrawlDelayComplianceMode { } ``` +### `CachingPolicy` + +You can configure how long `robots.txt` is cached. + +```typescript +import { CachingPolicyType } from 'axios-robots'; + +// Option 1: Indefinite Caching (Default) +const indefinite = { + type: CachingPolicyType.Indefinite +}; + +// Option 2: Time-based Expiration +const timeBased = { + type: CachingPolicyType.ExpireAfter, + duration: '1h' // Supports strings ('5m', '1d', '200ms') or numbers (milliseconds) +}; +``` + ### Error Handling The interceptor throws a `RobotsError` in the following cases: @@ -120,10 +140,10 @@ The interceptor throws a `RobotsError` in the following cases: - [x] **Standard Directives**: Supports `User-agent`, `Allow`, and `Disallow`. - [x] **Wildcards**: Supports standard path matching including `*` and `$`. - [x] **Crawl-delay**: The interceptor enforces `Crawl-delay` directives (automatic throttling) if configured. +- [x] **Cache TTL**: Flexible caching policies (indefinite or expiration-based). ### 🚧 Roadmap - [ ] **Sitemap**: Does not currently expose or parse `Sitemap` directives for the consumer. -- [ ] **Cache TTL**: Caching is currently indefinite for the lifecycle of the Axios instance. ## Contributing From 65d08b16b29f60eaebae14c246ac7598d920e8a6 Mon Sep 17 00:00:00 2001 From: Gil Nobrega <82336674+gilnobrega@users.noreply.github.com> Date: Tue, 20 Jan 2026 18:11:05 +0900 Subject: [PATCH 5/7] bump version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 0728ed0..1de8edf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "axios-robots", - "version": "0.2.0", + "version": "0.3.0", "description": "A lightweight Axios interceptor that enforces robots.txt compliance for web scrapers and bots", "main": "dist/index.js", "types": "dist/index.d.ts", From 7dc4c7c35787d43b4990ae7225ba664b7435dcca Mon Sep 17 00:00:00 2001 From: Gil Nobrega <82336674+gilnobrega@users.noreply.github.com> Date: Tue, 20 Jan 2026 18:19:59 +0900 Subject: [PATCH 6/7] organise files --- src/data/repositories/RobotsDataRepository.ts | 2 +- src/domain/services/CrawlDelayService.ts | 2 +- .../{ => caching}/CachingStrategyFactory.ts | 4 ++-- .../{ => caching}/ExpireAfterCachingStrategy.ts | 2 +- .../strategies/{ => caching}/ICachingStrategy.ts | 2 +- .../{ => caching}/IndefiniteCachingStrategy.ts | 2 +- .../{ => crawl-delay}/AwaitCrawlDelayStrategy.ts | 2 +- .../{ => crawl-delay}/CrawlDelayStrategyFactory.ts | 4 ++-- .../{ => crawl-delay}/FailureCrawlDelayStrategy.ts | 4 ++-- .../{ => crawl-delay}/ICrawlDelayStrategy.ts | 0 .../{ => crawl-delay}/IgnoreCrawlDelayStrategy.ts | 0 src/index.ts | 8 ++++---- .../AwaitCrawlDelayStrategy.test.ts | 4 ++-- .../CrawlDelayStrategyFactory.test.ts | 12 ++++++------ .../FailureCrawlDelayStrategy.test.ts | 6 +++--- .../IgnoreCrawlDelayStrategy.test.ts | 2 +- 16 files changed, 28 insertions(+), 28 deletions(-) rename src/domain/strategies/{ => caching}/CachingStrategyFactory.ts (83%) rename src/domain/strategies/{ => caching}/ExpireAfterCachingStrategy.ts (91%) rename src/domain/strategies/{ => caching}/ICachingStrategy.ts (87%) rename src/domain/strategies/{ => caching}/IndefiniteCachingStrategy.ts (77%) rename src/domain/strategies/{ => crawl-delay}/AwaitCrawlDelayStrategy.ts (85%) rename src/domain/strategies/{ => crawl-delay}/CrawlDelayStrategyFactory.ts (84%) rename src/domain/strategies/{ => crawl-delay}/FailureCrawlDelayStrategy.ts (75%) rename src/domain/strategies/{ => crawl-delay}/ICrawlDelayStrategy.ts (100%) rename src/domain/strategies/{ => crawl-delay}/IgnoreCrawlDelayStrategy.ts (100%) rename tests/unit/domain/strategies/{ => crawl-delay}/AwaitCrawlDelayStrategy.test.ts (86%) rename tests/unit/domain/strategies/{ => crawl-delay}/CrawlDelayStrategyFactory.test.ts (63%) rename tests/unit/domain/strategies/{ => crawl-delay}/FailureCrawlDelayStrategy.test.ts (76%) rename tests/unit/domain/strategies/{ => crawl-delay}/IgnoreCrawlDelayStrategy.test.ts (77%) diff --git a/src/data/repositories/RobotsDataRepository.ts b/src/data/repositories/RobotsDataRepository.ts index 9a4e68a..8dfca54 100644 --- a/src/data/repositories/RobotsDataRepository.ts +++ b/src/data/repositories/RobotsDataRepository.ts @@ -6,7 +6,7 @@ import { IRobotsDataRepository } from '../../domain/interfaces/IRobotsDataReposi import { CachedRobot } from '../../domain/models/CachedRobot'; import { RobotsPluginOptions } from '../../domain/models/RobotsPluginOptions'; import { CachingPolicy } from '../../domain/models/CachingPolicy'; -import { CachingStrategyFactory } from '../../domain/strategies/CachingStrategyFactory'; +import { CachingStrategyFactory } from '../../domain/strategies/caching/CachingStrategyFactory'; import { CachingPolicyType } from '../../domain/models/CachingPolicyType'; export class RobotsDataRepository implements IRobotsDataRepository { diff --git a/src/domain/services/CrawlDelayService.ts b/src/domain/services/CrawlDelayService.ts index 3753912..7e7c511 100644 --- a/src/domain/services/CrawlDelayService.ts +++ b/src/domain/services/CrawlDelayService.ts @@ -2,7 +2,7 @@ import { CrawlDelayComplianceMode } from '../models/CrawlDelayComplianceMode'; import { ICrawlDelayService } from '../interfaces/ICrawlDelayService'; import { IRobotsDataRepository } from '../interfaces/IRobotsDataRepository'; import { CalculateWaitTimeUseCase } from '../usecases/CalculateWaitTimeUseCase'; -import { CrawlDelayStrategyFactory } from '../strategies/CrawlDelayStrategyFactory'; +import { CrawlDelayStrategyFactory } from '../strategies/crawl-delay/CrawlDelayStrategyFactory'; export class CrawlDelayService implements ICrawlDelayService { private calculateWaitTimeUseCase: CalculateWaitTimeUseCase; diff --git a/src/domain/strategies/CachingStrategyFactory.ts b/src/domain/strategies/caching/CachingStrategyFactory.ts similarity index 83% rename from src/domain/strategies/CachingStrategyFactory.ts rename to src/domain/strategies/caching/CachingStrategyFactory.ts index 2ff79b5..745c6a3 100644 --- a/src/domain/strategies/CachingStrategyFactory.ts +++ b/src/domain/strategies/caching/CachingStrategyFactory.ts @@ -1,5 +1,5 @@ -import { CachingPolicy } from '../models/CachingPolicy'; -import { CachingPolicyType } from '../models/CachingPolicyType'; +import { CachingPolicy } from '../../models/CachingPolicy'; +import { CachingPolicyType } from '../../models/CachingPolicyType'; import { ICachingStrategy } from './ICachingStrategy'; import { IndefiniteCachingStrategy } from './IndefiniteCachingStrategy'; import { ExpireAfterCachingStrategy } from './ExpireAfterCachingStrategy'; diff --git a/src/domain/strategies/ExpireAfterCachingStrategy.ts b/src/domain/strategies/caching/ExpireAfterCachingStrategy.ts similarity index 91% rename from src/domain/strategies/ExpireAfterCachingStrategy.ts rename to src/domain/strategies/caching/ExpireAfterCachingStrategy.ts index 44d122f..8d806f4 100644 --- a/src/domain/strategies/ExpireAfterCachingStrategy.ts +++ b/src/domain/strategies/caching/ExpireAfterCachingStrategy.ts @@ -1,6 +1,6 @@ import ms from 'ms'; import { ICachingStrategy } from './ICachingStrategy'; -import { CachedRobot } from '../models/CachedRobot'; +import { CachedRobot } from '../../models/CachedRobot'; export class ExpireAfterCachingStrategy implements ICachingStrategy { private durationMs: number; diff --git a/src/domain/strategies/ICachingStrategy.ts b/src/domain/strategies/caching/ICachingStrategy.ts similarity index 87% rename from src/domain/strategies/ICachingStrategy.ts rename to src/domain/strategies/caching/ICachingStrategy.ts index 812296f..3bd5180 100644 --- a/src/domain/strategies/ICachingStrategy.ts +++ b/src/domain/strategies/caching/ICachingStrategy.ts @@ -1,4 +1,4 @@ -import { CachedRobot } from '../models/CachedRobot'; +import { CachedRobot } from '../../models/CachedRobot'; /** * Strategy interface for validating cached robots.txt data. diff --git a/src/domain/strategies/IndefiniteCachingStrategy.ts b/src/domain/strategies/caching/IndefiniteCachingStrategy.ts similarity index 77% rename from src/domain/strategies/IndefiniteCachingStrategy.ts rename to src/domain/strategies/caching/IndefiniteCachingStrategy.ts index c165a2b..9b3ab7f 100644 --- a/src/domain/strategies/IndefiniteCachingStrategy.ts +++ b/src/domain/strategies/caching/IndefiniteCachingStrategy.ts @@ -1,5 +1,5 @@ import { ICachingStrategy } from './ICachingStrategy'; -import { CachedRobot } from '../models/CachedRobot'; +import { CachedRobot } from '../../models/CachedRobot'; export class IndefiniteCachingStrategy implements ICachingStrategy { isValid(cached: CachedRobot): boolean { diff --git a/src/domain/strategies/AwaitCrawlDelayStrategy.ts b/src/domain/strategies/crawl-delay/AwaitCrawlDelayStrategy.ts similarity index 85% rename from src/domain/strategies/AwaitCrawlDelayStrategy.ts rename to src/domain/strategies/crawl-delay/AwaitCrawlDelayStrategy.ts index 5c27ca2..059eb55 100644 --- a/src/domain/strategies/AwaitCrawlDelayStrategy.ts +++ b/src/domain/strategies/crawl-delay/AwaitCrawlDelayStrategy.ts @@ -1,6 +1,6 @@ import { ICrawlDelayStrategy } from './ICrawlDelayStrategy'; -import { CalculateWaitTimeUseCase } from '../usecases/CalculateWaitTimeUseCase'; +import { CalculateWaitTimeUseCase } from '../../usecases/CalculateWaitTimeUseCase'; export class AwaitCrawlDelayStrategy implements ICrawlDelayStrategy { constructor(private calculateWaitTimeUseCase: CalculateWaitTimeUseCase) { } diff --git a/src/domain/strategies/CrawlDelayStrategyFactory.ts b/src/domain/strategies/crawl-delay/CrawlDelayStrategyFactory.ts similarity index 84% rename from src/domain/strategies/CrawlDelayStrategyFactory.ts rename to src/domain/strategies/crawl-delay/CrawlDelayStrategyFactory.ts index 0443d44..24dc599 100644 --- a/src/domain/strategies/CrawlDelayStrategyFactory.ts +++ b/src/domain/strategies/crawl-delay/CrawlDelayStrategyFactory.ts @@ -1,7 +1,7 @@ -import { CrawlDelayComplianceMode } from '../models/CrawlDelayComplianceMode'; +import { CrawlDelayComplianceMode } from '../../models/CrawlDelayComplianceMode'; import { ICrawlDelayStrategy } from './ICrawlDelayStrategy'; -import { CalculateWaitTimeUseCase } from '../usecases/CalculateWaitTimeUseCase'; +import { CalculateWaitTimeUseCase } from '../../usecases/CalculateWaitTimeUseCase'; import { AwaitCrawlDelayStrategy } from './AwaitCrawlDelayStrategy'; import { FailureCrawlDelayStrategy } from './FailureCrawlDelayStrategy'; import { IgnoreCrawlDelayStrategy } from './IgnoreCrawlDelayStrategy'; diff --git a/src/domain/strategies/FailureCrawlDelayStrategy.ts b/src/domain/strategies/crawl-delay/FailureCrawlDelayStrategy.ts similarity index 75% rename from src/domain/strategies/FailureCrawlDelayStrategy.ts rename to src/domain/strategies/crawl-delay/FailureCrawlDelayStrategy.ts index 5b6c25e..2a755b8 100644 --- a/src/domain/strategies/FailureCrawlDelayStrategy.ts +++ b/src/domain/strategies/crawl-delay/FailureCrawlDelayStrategy.ts @@ -1,7 +1,7 @@ import { ICrawlDelayStrategy } from './ICrawlDelayStrategy'; -import { CalculateWaitTimeUseCase } from '../usecases/CalculateWaitTimeUseCase'; -import { CrawlDelayError } from '../../errors/CrawlDelayError'; +import { CalculateWaitTimeUseCase } from '../../usecases/CalculateWaitTimeUseCase'; +import { CrawlDelayError } from '../../../errors/CrawlDelayError'; export class FailureCrawlDelayStrategy implements ICrawlDelayStrategy { constructor(private calculateWaitTimeUseCase: CalculateWaitTimeUseCase) { } diff --git a/src/domain/strategies/ICrawlDelayStrategy.ts b/src/domain/strategies/crawl-delay/ICrawlDelayStrategy.ts similarity index 100% rename from src/domain/strategies/ICrawlDelayStrategy.ts rename to src/domain/strategies/crawl-delay/ICrawlDelayStrategy.ts diff --git a/src/domain/strategies/IgnoreCrawlDelayStrategy.ts b/src/domain/strategies/crawl-delay/IgnoreCrawlDelayStrategy.ts similarity index 100% rename from src/domain/strategies/IgnoreCrawlDelayStrategy.ts rename to src/domain/strategies/crawl-delay/IgnoreCrawlDelayStrategy.ts diff --git a/src/index.ts b/src/index.ts index d84e058..e2fcad3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -16,10 +16,10 @@ export * from './domain/interfaces/ICrawlDelayService'; export * from './domain/models/CachingPolicy'; export * from './domain/models/CachingPolicyType'; -export * from './domain/strategies/ICachingStrategy'; -export * from './domain/strategies/IndefiniteCachingStrategy'; -export * from './domain/strategies/ExpireAfterCachingStrategy'; -export * from './domain/strategies/CachingStrategyFactory'; +export * from './domain/strategies/caching/ICachingStrategy'; +export * from './domain/strategies/caching/IndefiniteCachingStrategy'; +export * from './domain/strategies/caching/ExpireAfterCachingStrategy'; +export * from './domain/strategies/caching/CachingStrategyFactory'; /** * Apply the robots exclusion protocol interceptor to an Axios instance. * @param axiosInstance The axios instance to apply the interceptor to diff --git a/tests/unit/domain/strategies/AwaitCrawlDelayStrategy.test.ts b/tests/unit/domain/strategies/crawl-delay/AwaitCrawlDelayStrategy.test.ts similarity index 86% rename from tests/unit/domain/strategies/AwaitCrawlDelayStrategy.test.ts rename to tests/unit/domain/strategies/crawl-delay/AwaitCrawlDelayStrategy.test.ts index d8a8a7f..a38d48d 100644 --- a/tests/unit/domain/strategies/AwaitCrawlDelayStrategy.test.ts +++ b/tests/unit/domain/strategies/crawl-delay/AwaitCrawlDelayStrategy.test.ts @@ -1,5 +1,5 @@ -import { AwaitCrawlDelayStrategy } from '../../../../src/domain/strategies/AwaitCrawlDelayStrategy'; -import { CalculateWaitTimeUseCase } from '../../../../src/domain/usecases/CalculateWaitTimeUseCase'; +import { AwaitCrawlDelayStrategy } from "../../../../../src/domain/strategies/crawl-delay/AwaitCrawlDelayStrategy"; +import { CalculateWaitTimeUseCase } from "../../../../../src/domain/usecases/CalculateWaitTimeUseCase"; describe('AwaitCrawlDelayStrategy', () => { let strategy: AwaitCrawlDelayStrategy; diff --git a/tests/unit/domain/strategies/CrawlDelayStrategyFactory.test.ts b/tests/unit/domain/strategies/crawl-delay/CrawlDelayStrategyFactory.test.ts similarity index 63% rename from tests/unit/domain/strategies/CrawlDelayStrategyFactory.test.ts rename to tests/unit/domain/strategies/crawl-delay/CrawlDelayStrategyFactory.test.ts index 7d6602c..b5ae453 100644 --- a/tests/unit/domain/strategies/CrawlDelayStrategyFactory.test.ts +++ b/tests/unit/domain/strategies/crawl-delay/CrawlDelayStrategyFactory.test.ts @@ -1,9 +1,9 @@ -import { CrawlDelayStrategyFactory } from '../../../../src/domain/strategies/CrawlDelayStrategyFactory'; -import { AwaitCrawlDelayStrategy } from '../../../../src/domain/strategies/AwaitCrawlDelayStrategy'; -import { FailureCrawlDelayStrategy } from '../../../../src/domain/strategies/FailureCrawlDelayStrategy'; -import { IgnoreCrawlDelayStrategy } from '../../../../src/domain/strategies/IgnoreCrawlDelayStrategy'; -import { CrawlDelayComplianceMode } from '../../../../src/domain/models/CrawlDelayComplianceMode'; -import { CalculateWaitTimeUseCase } from '../../../../src/domain/usecases/CalculateWaitTimeUseCase'; +import { CrawlDelayComplianceMode } from "../../../../../src"; +import { AwaitCrawlDelayStrategy } from "../../../../../src/domain/strategies/crawl-delay/AwaitCrawlDelayStrategy"; +import { CrawlDelayStrategyFactory } from "../../../../../src/domain/strategies/crawl-delay/CrawlDelayStrategyFactory"; +import { FailureCrawlDelayStrategy } from "../../../../../src/domain/strategies/crawl-delay/FailureCrawlDelayStrategy"; +import { IgnoreCrawlDelayStrategy } from "../../../../../src/domain/strategies/crawl-delay/IgnoreCrawlDelayStrategy"; +import { CalculateWaitTimeUseCase } from "../../../../../src/domain/usecases/CalculateWaitTimeUseCase"; describe('CrawlDelayStrategyFactory', () => { let factory: CrawlDelayStrategyFactory; diff --git a/tests/unit/domain/strategies/FailureCrawlDelayStrategy.test.ts b/tests/unit/domain/strategies/crawl-delay/FailureCrawlDelayStrategy.test.ts similarity index 76% rename from tests/unit/domain/strategies/FailureCrawlDelayStrategy.test.ts rename to tests/unit/domain/strategies/crawl-delay/FailureCrawlDelayStrategy.test.ts index eddbbad..ac8991e 100644 --- a/tests/unit/domain/strategies/FailureCrawlDelayStrategy.test.ts +++ b/tests/unit/domain/strategies/crawl-delay/FailureCrawlDelayStrategy.test.ts @@ -1,6 +1,6 @@ -import { FailureCrawlDelayStrategy } from '../../../../src/domain/strategies/FailureCrawlDelayStrategy'; -import { CalculateWaitTimeUseCase } from '../../../../src/domain/usecases/CalculateWaitTimeUseCase'; -import { CrawlDelayError } from '../../../../src/errors/CrawlDelayError'; +import { FailureCrawlDelayStrategy } from '../../../../../src/domain/strategies/crawl-delay/FailureCrawlDelayStrategy'; +import { CalculateWaitTimeUseCase } from '../../../../../src/domain/usecases/CalculateWaitTimeUseCase'; +import { CrawlDelayError } from '../../../../../src/errors/CrawlDelayError'; describe('FailureCrawlDelayStrategy', () => { let strategy: FailureCrawlDelayStrategy; diff --git a/tests/unit/domain/strategies/IgnoreCrawlDelayStrategy.test.ts b/tests/unit/domain/strategies/crawl-delay/IgnoreCrawlDelayStrategy.test.ts similarity index 77% rename from tests/unit/domain/strategies/IgnoreCrawlDelayStrategy.test.ts rename to tests/unit/domain/strategies/crawl-delay/IgnoreCrawlDelayStrategy.test.ts index 351e4d5..b723b73 100644 --- a/tests/unit/domain/strategies/IgnoreCrawlDelayStrategy.test.ts +++ b/tests/unit/domain/strategies/crawl-delay/IgnoreCrawlDelayStrategy.test.ts @@ -1,4 +1,4 @@ -import { IgnoreCrawlDelayStrategy } from '../../../../src/domain/strategies/IgnoreCrawlDelayStrategy'; +import { IgnoreCrawlDelayStrategy } from "../../../../../src/domain/strategies/crawl-delay/IgnoreCrawlDelayStrategy"; describe('IgnoreCrawlDelayStrategy', () => { let strategy: IgnoreCrawlDelayStrategy; From 3eb9a09e9a6965c7f2a30a38d66f527cfc552fd1 Mon Sep 17 00:00:00 2001 From: Gil Nobrega <82336674+gilnobrega@users.noreply.github.com> Date: Tue, 20 Jan 2026 23:56:49 +0900 Subject: [PATCH 7/7] fix dependencies --- package.json | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 1de8edf..c02fb56 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,9 @@ "url": "https://github.com/hyperfluid-tech/axios-robots/issues" }, "homepage": "https://github.com/hyperfluid-tech/axios-robots#readme", + "peerDependencies": { + "axios": "^1.0.0" + }, "devDependencies": { "@types/jest": "^30.0.0", "@types/ms": "^2.1.0", @@ -40,11 +43,11 @@ "axios": "^1.13.2", "jest": "^30.2.0", "nock": "^14.0.10", - "robots-parser": "^3.0.1", "ts-jest": "^29.4.6", "typescript": "^5.9.3" }, "dependencies": { - "ms": "^2.1.3" + "ms": "^2.1.3", + "robots-parser": "^3.0.1" } }