hyperfluid-tech · gilnobrega · Jan 14, 2026 · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026
diff --git a/README.md b/README.md
@@ -12,6 +12,7 @@ Ensures your bot plays by the rules defined by website owners, preventing unauth
 ## Features
 
 - **🚀 Automated Compliance**: Validates every request against `robots.txt` rules (cached per origin).
+- **⏱️ Crawl-Delay**: Option to automatically wait before requests if `Crawl-delay` is specified.
 - **🛡️ Strict Mode**: invalid URLs, non-HTTP/S protocols, or unreachable `robots.txt` files (non-4xx error) block requests by default.
 - **✨ Clean Architecture**: built with maintainability and separation of concerns in mind.
 - **🔌 Plug-and-Play**: easily attaches to any Axios instance.
@@ -43,7 +44,7 @@ const client = axios.create();
 
 // Apply the interceptor
 applyRobotsInterceptor(client, { 
-    userAgent: 'MyCoolBot/1.0' 
+    userAgent: 'MyCoolBot/1.0',
 });
 
 async function crawl() {
@@ -81,6 +82,13 @@ Attaches the interceptor to the provided Axios instance.
 ```typescript
 interface RobotsPluginOptions {
   userAgent: string;
+  crawlDelayCompliance?: CrawlDelayComplianceMode; // default: CrawlDelayComplianceMode.Await
+}
+
+enum CrawlDelayComplianceMode {
+  Await = 'await',   // Respects delay by waiting
+  Ignore = 'ignore', // Ignores delay
+  Failure = 'failure' // Throws Error if delay is not met
 }
 ```
 
@@ -111,9 +119,9 @@ The interceptor throws a `RobotsError` in the following cases:
 - [x] **[RFC 9309](https://www.rfc-editor.org/rfc/rfc9309.html) Compliance**: Full support for the standard Robots Exclusion Protocol.
 - [x] **Standard Directives**: Supports `User-agent`, `Allow`, and `Disallow`.
 - [x] **Wildcards**: Supports standard path matching including `*` and `$`.
+- [x] **Crawl-delay**: The interceptor enforces `Crawl-delay` directives (automatic throttling) if configured.
 
 ### 🚧 Missing / TODO
-- [ ] **Crawl-delay**: The interceptor currently does **not** enforce `Crawl-delay` directives (automatic throttling).
 - [ ] **Sitemap**: Does not currently expose or parse `Sitemap` directives for the consumer.
 - [ ] **Cache TTL**: Caching is currently indefinite for the lifecycle of the Axios instance.
 

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "axios-robots",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "description": "A lightweight Axios interceptor that enforces robots.txt compliance for web scrapers and bots",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

diff --git a/src/data/repositories/RobotsDataRepository.ts b/src/data/repositories/RobotsDataRepository.ts
@@ -0,0 +1,57 @@
+import robotsParser, { Robot } from 'robots-parser';
+import axios from 'axios';
+import { HEADER_USER_AGENT, ROBOTS_TXT_FILENAME, ALLOW_ALL_ROBOTS_TXT_CONTENT } from '../../constants';
+import { RobotsUnreachableError } from '../../errors/RobotsUnreachableError';
+import { IRobotsDataRepository } from '../../domain/interfaces/IRobotsDataRepository';
+import { CachedRobot } from '../../domain/models/CachedRobot';
+
+export class RobotsDataRepository implements IRobotsDataRepository {
+    private cache: Map<string, CachedRobot> = new Map();
+
+    async getRobot(url: string, userAgent: string = '*'): Promise<CachedRobot> {
+        const origin = new URL(url).origin;
+        let cached = this.cache.get(origin);
+
+        if (cached)
+            return cached;
+
+        const robot = await this.fetchRobotsTxt(origin, userAgent);
+        cached = { robot };
+        this.cache.set(origin, cached);
+
+        return cached;
+    }
+
+    setLastCrawled(url: string, timestamp: number): void {
+        const origin = new URL(url).origin;
+        const cached = this.cache.get(origin);
+        if (cached) {
+            cached.lastCrawled = timestamp;
+        }
+    }
+
+    private async fetchRobotsTxt(origin: string, userAgent: string): Promise<Robot> {
+        const robotsUrl = `${origin}/${ROBOTS_TXT_FILENAME}`;
+
+        const internalClient = axios.create({
+            headers: {
+                [HEADER_USER_AGENT]: userAgent,
+            }
+        });
+
+        try {
+            const response = await internalClient.get(robotsUrl);
+            return robotsParser(robotsUrl, response.data);
+        } catch (error: any) {
+            if (this.isUnavailable(error)) {
+                return robotsParser(robotsUrl, ALLOW_ALL_ROBOTS_TXT_CONTENT);
+            }
+
+            throw new RobotsUnreachableError(error.message);
+        }
+    }
+
+    private isUnavailable(error: any): boolean {
+        return error.response && error.response.status >= 400 && error.response.status < 500;
+    }
+}
diff --git a/src/domain/RobotsService.ts b/src/domain/RobotsService.ts
diff --git a/src/domain/interfaces/IAllowService.ts b/src/domain/interfaces/IAllowService.ts
@@ -0,0 +1,12 @@
+/**
+ * Service for checking if a URL is allowed to be crawled according to robots.txt rules.
+ */
+export interface IAllowService {
+    /**
+     * Checks if the given URL is allowed for the specified user agent.
+     * @param url The URL to check.
+     * @param userAgent The user agent to check against.
+     * @returns A promise resolving to true if allowed, false otherwise.
+     */
+    isAllowed(url: string, userAgent?: string): Promise<boolean>;
+}
diff --git a/src/domain/interfaces/ICrawlDelayService.ts b/src/domain/interfaces/ICrawlDelayService.ts
@@ -0,0 +1,15 @@
+import { CrawlDelayComplianceMode } from '../models/CrawlDelayComplianceMode';
+
+/**
+ * Service for handling Crawl-delay directives from robots.txt.
+ */
+export interface ICrawlDelayService {
+    /**
+     * Enforces the crawl delay for a given URL based on the compliance mode.
+     * @param url The URL about to be requested.
+     * @param userAgent The user agent to check rules for.
+     * @param complianceMode The mode determining how to handle the delay (Await, Ignore, Failure).
+     * @returns A promise that resolves when it is safe to proceed (or throws if in Failure mode).
+     */
+    handleCrawlDelay(url: string, userAgent: string, complianceMode: CrawlDelayComplianceMode): Promise<void>;
+}
diff --git a/src/domain/interfaces/IRobotsDataRepository.ts b/src/domain/interfaces/IRobotsDataRepository.ts
@@ -0,0 +1,21 @@
+import { CachedRobot } from '../models/CachedRobot';
+
+/**
+ * Repository for managing robots.txt data and crawl timestamps independently of the protocol logic.
+ */
+export interface IRobotsDataRepository {
+    /**
+     * Retrieves the cached robot instance for a given URL.
+     * @param url The URL to get the robot for (used to extract the domain/origin).
+     * @param userAgent Optional user agent to use for fetching robots.txt if not cached.
+     * @returns A promise resolving to the CachedRobot containing the parsed rules.
+     */
+    getRobot(url: string, userAgent?: string): Promise<CachedRobot>;
+
+    /**
+     * Updates the last crawled timestamp for the domain associated with the URL.
+     * @param url The URL identifying the domain.
+     * @param timestamp The timestamp to set.
+     */
+    setLastCrawled(url: string, timestamp: number): void;
+}
diff --git a/src/domain/models/CachedRobot.ts b/src/domain/models/CachedRobot.ts
@@ -0,0 +1,12 @@
+import { Robot } from 'robots-parser';
+
+export interface CachedRobot {
+    /**
+     * The parsed robots.txt object.
+     */
+    robot: Robot;
+    /**
+     * Timestamp of the last crawl for this domain.
+     */
+    lastCrawled?: number;
+}
diff --git a/src/domain/models/CrawlDelayComplianceMode.ts b/src/domain/models/CrawlDelayComplianceMode.ts
@@ -0,0 +1,17 @@
+/**
+ * Options for the Robots Exclusion Protocol plugin.
+ */
+export enum CrawlDelayComplianceMode {
+    /**
+     * Respects the Crawl-delay directive by waiting before making the request.
+     */
+    Await = 'await',
+    /**
+     * Ignores the Crawl-delay directive.
+     */
+    Ignore = 'ignore',
+    /**
+     * Throws an error if the request violates the Crawl-delay.
+     */
+    Failure = 'failure'
+}
diff --git a/src/domain/models/RobotsPluginOptions.ts b/src/domain/models/RobotsPluginOptions.ts
@@ -0,0 +1,13 @@
+import { CrawlDelayComplianceMode } from './CrawlDelayComplianceMode';
+
+export interface RobotsPluginOptions {
+    /**
+     * The User-Agent string to use when checking robots.txt rules.
+     */
+    userAgent: string;
+    /**
+     * How to handle Crawl-delay directives.
+     * Defaults to CrawlDelayComplianceMode.Await
+     */
+    crawlDelayCompliance?: CrawlDelayComplianceMode;
+}
diff --git a/src/domain/services/AllowService.ts b/src/domain/services/AllowService.ts
@@ -0,0 +1,16 @@
+import { IAllowService } from '../interfaces/IAllowService';
+import { IRobotsDataRepository } from '../interfaces/IRobotsDataRepository';
+
+export class AllowService implements IAllowService {
+    constructor(private dataService: IRobotsDataRepository) { }
+
+    async isAllowed(url: string, userAgent: string = '*'): Promise<boolean> {
+        const robot = await this.dataService.getRobot(url, userAgent);
+
+        if (!robot || !robot.robot) {
+            return true;
+        }
+
+        return robot.robot.isAllowed(url, userAgent) ?? true;
+    }
+}
diff --git a/src/domain/services/CrawlDelayService.ts b/src/domain/services/CrawlDelayService.ts
@@ -0,0 +1,24 @@
+import { CrawlDelayComplianceMode } from '../models/CrawlDelayComplianceMode';
+import { ICrawlDelayService } from '../interfaces/ICrawlDelayService';
+import { IRobotsDataRepository } from '../interfaces/IRobotsDataRepository';
+import { CalculateWaitTimeUseCase } from '../usecases/CalculateWaitTimeUseCase';
+import { CrawlDelayStrategyFactory } from '../strategies/CrawlDelayStrategyFactory';
+
+export class CrawlDelayService implements ICrawlDelayService {
+    private calculateWaitTimeUseCase: CalculateWaitTimeUseCase;
+    private strategyFactory: CrawlDelayStrategyFactory;
+
+    constructor(private dataService: IRobotsDataRepository) {
+        this.calculateWaitTimeUseCase = new CalculateWaitTimeUseCase(dataService);
+        this.strategyFactory = new CrawlDelayStrategyFactory(this.calculateWaitTimeUseCase);
+    }
+
+    async handleCrawlDelay(
+        url: string, 
+        userAgent: string, 
+        complianceMode: CrawlDelayComplianceMode
+    ): Promise<void> {
+        const strategy = this.strategyFactory.getStrategy(complianceMode);
+        await strategy.execute(url, userAgent);
+    }
+}
diff --git a/src/domain/strategies/AwaitCrawlDelayStrategy.ts b/src/domain/strategies/AwaitCrawlDelayStrategy.ts
@@ -0,0 +1,17 @@
+
+import { ICrawlDelayStrategy } from './ICrawlDelayStrategy';
+import { CalculateWaitTimeUseCase } from '../usecases/CalculateWaitTimeUseCase';
+
+export class AwaitCrawlDelayStrategy implements ICrawlDelayStrategy {
+    constructor(private calculateWaitTimeUseCase: CalculateWaitTimeUseCase) { }
+
+    async execute(url: string, userAgent: string): Promise<void> {
+        const { waitTime } = await this.calculateWaitTimeUseCase.execute(url, userAgent);
+
+        if (waitTime <= 0)
+            return;
+
+
+        await new Promise(resolve => setTimeout(resolve, waitTime));
+    }
+}
diff --git a/src/domain/strategies/CrawlDelayStrategyFactory.ts b/src/domain/strategies/CrawlDelayStrategyFactory.ts
@@ -0,0 +1,22 @@
+
+import { CrawlDelayComplianceMode } from '../models/CrawlDelayComplianceMode';
+import { ICrawlDelayStrategy } from './ICrawlDelayStrategy';
+import { CalculateWaitTimeUseCase } from '../usecases/CalculateWaitTimeUseCase';
+import { AwaitCrawlDelayStrategy } from './AwaitCrawlDelayStrategy';
+import { FailureCrawlDelayStrategy } from './FailureCrawlDelayStrategy';
+import { IgnoreCrawlDelayStrategy } from './IgnoreCrawlDelayStrategy';
+
+export class CrawlDelayStrategyFactory {
+    constructor(private calculateWaitTimeUseCase: CalculateWaitTimeUseCase) { }
+
+    getStrategy(mode: CrawlDelayComplianceMode): ICrawlDelayStrategy {
+        switch (mode) {
+            case CrawlDelayComplianceMode.Failure:
+                return new FailureCrawlDelayStrategy(this.calculateWaitTimeUseCase);
+            case CrawlDelayComplianceMode.Ignore:
+                return new IgnoreCrawlDelayStrategy();
+            case CrawlDelayComplianceMode.Await:
+                return new AwaitCrawlDelayStrategy(this.calculateWaitTimeUseCase);
+        }
+    }
+}
diff --git a/src/domain/strategies/FailureCrawlDelayStrategy.ts b/src/domain/strategies/FailureCrawlDelayStrategy.ts
@@ -0,0 +1,16 @@
+
+import { ICrawlDelayStrategy } from './ICrawlDelayStrategy';
+import { CalculateWaitTimeUseCase } from '../usecases/CalculateWaitTimeUseCase';
+import { CrawlDelayError } from '../../errors/CrawlDelayError';
+
+export class FailureCrawlDelayStrategy implements ICrawlDelayStrategy {
+    constructor(private calculateWaitTimeUseCase: CalculateWaitTimeUseCase) { }
+
+    async execute(url: string, userAgent: string): Promise<void> {
+        const { waitTime, delay } = await this.calculateWaitTimeUseCase.execute(url, userAgent);
+
+        if (waitTime <= 0) return;
+
+        throw new CrawlDelayError(delay);
+    }
+}
diff --git a/src/domain/strategies/ICrawlDelayStrategy.ts b/src/domain/strategies/ICrawlDelayStrategy.ts
@@ -0,0 +1,12 @@
+
+/**
+ * Strategy for ensuring compliance with Crawl-delay rules.
+ */
+export interface ICrawlDelayStrategy {
+    /**
+     * Executes the strategy for a given URL and user agent.
+     * @param url The URL about to be crawled.
+     * @param userAgent The user agent for which to check the rules.
+     */
+    execute(url: string, userAgent: string): Promise<void>;
+}
diff --git a/src/domain/strategies/IgnoreCrawlDelayStrategy.ts b/src/domain/strategies/IgnoreCrawlDelayStrategy.ts
@@ -0,0 +1,8 @@
+
+import { ICrawlDelayStrategy } from './ICrawlDelayStrategy';
+
+export class IgnoreCrawlDelayStrategy implements ICrawlDelayStrategy {
+    async execute(url: string, userAgent: string): Promise<void> {
+        return;
+    }
+}