Skip to content

Commit 2f6db18

Browse files
committed
refactor: Added WaitJob helper.
1 parent 5a62d9e commit 2f6db18

5 files changed

Lines changed: 67 additions & 35 deletions

File tree

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,39 @@ using Firecrawl;
1818

1919
using var api = new FirecrawlApp(apiKey);
2020

21+
// Scrape
2122
var response = await api.Scraping.ScrapeAsync("https://docs.firecrawl.dev/features/scrape");
2223

2324
string markdown = response.Data.Markdown;
25+
26+
// Crawl
27+
var response = await api.Crawling.CrawlUrlsAsync(
28+
url: "https://docs.firecrawl.dev/",
29+
crawlerOptions: new CrawlUrlsRequestCrawlerOptions
30+
{
31+
Limit = 3,
32+
},
33+
pageOptions: new CrawlUrlsRequestPageOptions
34+
{
35+
OnlyMainContent = true,
36+
});
37+
38+
var jobResponse = await api.Crawl.WaitJobAsync(
39+
jobId: response.JobId);
40+
41+
foreach (var data in jobResponse.Data)
42+
{
43+
Console.WriteLine($"URL: {data.Metadata.SourceURL}");
44+
Console.WriteLine($"Output file: {data.Markdown}");
45+
}
2446
```
2547

2648
### CLI
2749
```bash
2850
dotnet tool install -g Firecrawl.Cli
2951
firecrawl auth <API_KEY>
3052
firecrawl scrape https://docs.firecrawl.dev/features/scrape // saves it to output.md
53+
firecrawl crawl https://docs.firecrawl.dev/features/scrape --limit 5 // saves all .md files to docs.firecrawl.dev folder
3154
```
3255

3356
## Support

src/libs/Firecrawl.Cli/Commands/CrawlCommand.cs

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using System.CommandLine;
2-
using System.Diagnostics.CodeAnalysis;
32

43
namespace Firecrawl.Cli.Commands;
54

@@ -21,7 +20,7 @@ public CrawlCommand() : base(name: "crawl", description: "Crawl a url and saves
2120

2221
var limit = new Option<int>(
2322
name: "limit",
24-
getDefaultValue: () => 10,
23+
getDefaultValue: () => 5,
2524
description: "Limit of pages to crawl");
2625
AddOption(limit);
2726

@@ -67,18 +66,8 @@ private static async Task HandleAsync(
6766

6867
Console.WriteLine($"JobId: {response.JobId}");
6968

70-
GetCrawlStatusResponse? statusResponse = null;
71-
while (true)
72-
{
73-
await Task.Delay(TimeSpan.FromSeconds(5)).ConfigureAwait(false);
74-
75-
statusResponse = await api.Crawl.GetCrawlStatusAsync(
76-
jobId: response.JobId!).ConfigureAwait(false);
77-
if (statusResponse.Status == "completed")
78-
{
79-
break;
80-
}
81-
}
69+
var jobResponse = await api.Crawl.WaitJobAsync(
70+
jobId: response.JobId!).ConfigureAwait(false);
8271

8372
if (string.IsNullOrWhiteSpace(outputPath))
8473
{
@@ -88,7 +77,7 @@ private static async Task HandleAsync(
8877
Directory.CreateDirectory(outputPath);
8978

9079
var index = 0;
91-
foreach (var data in statusResponse.Data ?? [])
80+
foreach (var data in jobResponse.Data ?? [])
9281
{
9382
var name = string.IsNullOrWhiteSpace(data.Metadata?.SourceURL)
9483
? $"output{++index}.md"
@@ -115,7 +104,7 @@ public static string ConvertUrlToFilename(string url)
115104
.Replace("www.", string.Empty, StringComparison.OrdinalIgnoreCase);
116105

117106
// Replace invalid filename characters with '_'
118-
foreach (char c in Path.GetInvalidFileNameChars())
107+
foreach (var c in Path.GetInvalidFileNameChars())
119108
{
120109
url = url.Replace(c, '_');
121110
}

src/libs/Firecrawl.Cli/Firecrawl.Cli.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<ImplicitUsings>enable</ImplicitUsings>
77
<Nullable>enable</Nullable>
88
<GenerateDocumentationFile>false</GenerateDocumentationFile>
9-
<NoWarn>$(NoWarn);CA1724;CA1303</NoWarn>
9+
<NoWarn>$(NoWarn);CA1724;CA1303;CA1054;CA1055</NoWarn>
1010
</PropertyGroup>
1111

1212
<PropertyGroup Label="NuGet">
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
namespace Firecrawl;
2+
3+
public partial class CrawlClient
4+
{
5+
/// <summary>
6+
/// Waits for a crawl job to complete or fail.
7+
/// </summary>
8+
/// <param name="jobId"></param>
9+
/// <param name="cancellationToken">The token to cancel the operation with</param>
10+
/// <exception cref="global::System.InvalidOperationException"></exception>
11+
public async Task<GetCrawlStatusResponse> WaitJobAsync(
12+
string jobId,
13+
CancellationToken cancellationToken = default)
14+
{
15+
while (true)
16+
{
17+
cancellationToken.ThrowIfCancellationRequested();
18+
19+
await Task.Delay(TimeSpan.FromSeconds(1), cancellationToken).ConfigureAwait(false);
20+
21+
var statusResponse = await GetCrawlStatusAsync(
22+
jobId: jobId,
23+
cancellationToken: cancellationToken).ConfigureAwait(false);
24+
if (statusResponse.Status is "completed" or "failed")
25+
{
26+
return statusResponse;
27+
}
28+
}
29+
}
30+
}

src/tests/IntegrationTests/Tests.Crawl.cs

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,12 @@ public async Task Crawl()
2626

2727
response.JobId.Should().NotBeNullOrEmpty();
2828

29-
GetCrawlStatusResponse? statusResponse = null;
30-
while (!cancellationToken.IsCancellationRequested)
31-
{
32-
await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken);
33-
34-
statusResponse = await api.Crawl.GetCrawlStatusAsync(
35-
jobId: response.JobId!,
36-
cancellationToken: cancellationToken);
37-
if (statusResponse.Status == "completed")
38-
{
39-
break;
40-
}
41-
}
29+
var jobResponse = await api.Crawl.WaitJobAsync(
30+
jobId: response.JobId!,
31+
cancellationToken: cancellationToken);
4232

4333
var index = 0;
44-
foreach (var data in statusResponse?.Data ?? [])
34+
foreach (var data in jobResponse.Data ?? [])
4535
{
4636
data.Html.Should().NotBeNullOrEmpty();
4737
data.Markdown.Should().NotBeNullOrEmpty();
@@ -51,9 +41,9 @@ public async Task Crawl()
5141
Console.WriteLine($"Output file: {new Uri(fileInfo.FullName).AbsoluteUri}");
5242
}
5343

54-
statusResponse.Should().NotBeNull();
55-
statusResponse!.Status.Should().Be("completed");
56-
statusResponse.Total.Should().Be(3);
57-
statusResponse.Data.Should().NotBeNullOrEmpty();
44+
jobResponse.Should().NotBeNull();
45+
jobResponse.Status.Should().Be("completed");
46+
jobResponse.Total.Should().Be(3);
47+
jobResponse.Data.Should().NotBeNullOrEmpty();
5848
}
5949
}

0 commit comments

Comments
 (0)