@@ -6,7 +6,15 @@ import { createGunzip } from "node:zlib";
66import tar from "tar-stream" ;
77import { parseGitHubUrl , downloadTarball } from "./github.js" ;
88import { saveConfig } from "./config.js" ;
9- import type { RefdocsConfig , Source } from "./types.js" ;
9+ import {
10+ isGitHubUrl ,
11+ isTextFileUrl ,
12+ deriveLocalPath ,
13+ deriveCrawlDir ,
14+ fetchSingleFile ,
15+ crawlSite ,
16+ } from "./crawl.js" ;
17+ import type { RefdocsConfig , Source , GitHubSource , FileSource , CrawlSource } from "./types.js" ;
1018
1119export interface AddOptions {
1220 path ?: string ;
@@ -25,7 +33,13 @@ export interface UpdateResult {
2533 filesWritten : number ;
2634}
2735
28- export async function addFromUrl (
36+ export interface CrawlAddOptions {
37+ path ?: string ;
38+ maxPages ?: number ;
39+ depth ?: number ;
40+ }
41+
42+ export async function addFromGitHub (
2943 url : string ,
3044 options : AddOptions ,
3145 configDir : string ,
@@ -47,7 +61,8 @@ export async function addFromUrl(
4761 join ( configDir , localPath ) ,
4862 ) ;
4963
50- const source : Source = {
64+ const source : GitHubSource = {
65+ type : "github" ,
5166 url,
5267 owner : parsed . owner ,
5368 repo : parsed . repo ,
@@ -68,6 +83,74 @@ export async function addFromUrl(
6883 return { filesWritten, localPath, source } ;
6984}
7085
86+ export async function addFromFileUrl (
87+ url : string ,
88+ options : { path ?: string } ,
89+ configDir : string ,
90+ config : RefdocsConfig ,
91+ ) : Promise < AddResult > {
92+ const localPath = options . path ?? deriveLocalPath ( url ) ;
93+ const fullPath = join ( configDir , localPath ) ;
94+
95+ const { content } = await fetchSingleFile ( url ) ;
96+
97+ mkdirSync ( dirname ( fullPath ) , { recursive : true } ) ;
98+ writeFileSync ( fullPath , content , "utf-8" ) ;
99+
100+ const source : FileSource = {
101+ type : "file" ,
102+ url,
103+ localPath,
104+ addedAt : new Date ( ) . toISOString ( ) ,
105+ } ;
106+
107+ // Add the parent directory to paths (not the file itself)
108+ const pathDir = dirname ( localPath ) ;
109+ const paths = isPathCovered ( config . paths , pathDir )
110+ ? config . paths
111+ : [ ...config . paths , pathDir ] ;
112+
113+ const sources = upsertSource ( config . sources ?? [ ] , source ) ;
114+
115+ saveConfig ( { paths, sources } , configDir ) ;
116+
117+ return { filesWritten : 1 , localPath, source } ;
118+ }
119+
120+ export async function addFromCrawl (
121+ url : string ,
122+ options : CrawlAddOptions ,
123+ configDir : string ,
124+ config : RefdocsConfig ,
125+ ) : Promise < AddResult > {
126+ const localPath = options . path ?? deriveCrawlDir ( url ) ;
127+ const outputDir = join ( configDir , localPath ) ;
128+
129+ const result = await crawlSite ( url , outputDir , {
130+ maxPages : options . maxPages ,
131+ depth : options . depth ,
132+ } ) ;
133+
134+ const source : CrawlSource = {
135+ type : "crawl" ,
136+ url,
137+ scope : url ,
138+ localPath,
139+ pagesCrawled : result . filesWritten ,
140+ addedAt : new Date ( ) . toISOString ( ) ,
141+ } ;
142+
143+ const paths = isPathCovered ( config . paths , localPath )
144+ ? config . paths
145+ : [ ...config . paths , localPath ] ;
146+
147+ const sources = upsertSource ( config . sources ?? [ ] , source ) ;
148+
149+ saveConfig ( { paths, sources } , configDir ) ;
150+
151+ return { filesWritten : result . filesWritten , localPath, source } ;
152+ }
153+
71154export async function updateSources (
72155 config : RefdocsConfig ,
73156 configDir : string ,
@@ -80,14 +163,48 @@ export async function updateSources(
80163
81164 const results : UpdateResult [ ] = [ ] ;
82165 for ( const source of sources ) {
83- const ref = source . branch === "HEAD" ? undefined : source . branch ;
84- const tarball = await downloadTarball ( source . owner , source . repo , ref , token ) ;
85- const filesWritten = await extractMarkdownFiles (
86- Buffer . from ( tarball ) ,
87- source . subpath ,
88- join ( configDir , source . localPath ) ,
89- ) ;
90- results . push ( { source, filesWritten } ) ;
166+ switch ( source . type ) {
167+ case "github" : {
168+ const ref = source . branch === "HEAD" ? undefined : source . branch ;
169+ const tarball = await downloadTarball ( source . owner , source . repo , ref , token ) ;
170+ const filesWritten = await extractMarkdownFiles (
171+ Buffer . from ( tarball ) ,
172+ source . subpath ,
173+ join ( configDir , source . localPath ) ,
174+ ) ;
175+ results . push ( { source, filesWritten } ) ;
176+ break ;
177+ }
178+ case "file" : {
179+ const { content } = await fetchSingleFile ( source . url ) ;
180+ const fullPath = join ( configDir , source . localPath ) ;
181+ mkdirSync ( dirname ( fullPath ) , { recursive : true } ) ;
182+ writeFileSync ( fullPath , content , "utf-8" ) ;
183+ results . push ( { source, filesWritten : 1 } ) ;
184+ break ;
185+ }
186+ case "crawl" : {
187+ const outputDir = join ( configDir , source . localPath ) ;
188+ const crawlResult = await crawlSite ( source . url , outputDir ) ;
189+ results . push ( { source, filesWritten : crawlResult . filesWritten } ) ;
190+ break ;
191+ }
192+ default : {
193+ // Handle legacy sources without a type field
194+ const legacy = source as Record < string , unknown > ;
195+ if ( typeof legacy . owner === "string" && typeof legacy . repo === "string" ) {
196+ const ref = ( legacy . branch as string ) === "HEAD" ? undefined : legacy . branch as string ;
197+ const tarball = await downloadTarball ( legacy . owner as string , legacy . repo as string , ref , token ) ;
198+ const filesWritten = await extractMarkdownFiles (
199+ Buffer . from ( tarball ) ,
200+ ( legacy . subpath as string ) ?? "" ,
201+ join ( configDir , ( legacy . localPath as string ) ?? "" ) ,
202+ ) ;
203+ results . push ( { source, filesWritten } ) ;
204+ }
205+ break ;
206+ }
207+ }
91208 }
92209
93210 return results ;
@@ -227,9 +344,18 @@ export function isPathCovered(existingPaths: string[], newPath: string): boolean
227344}
228345
229346function upsertSource ( sources : Source [ ] , newSource : Source ) : Source [ ] {
230- const key = `${ newSource . owner } /${ newSource . repo } /${ newSource . subpath } ` ;
231- const filtered = sources . filter (
232- ( s ) => `${ s . owner } /${ s . repo } /${ s . subpath } ` !== key ,
233- ) ;
347+ const key = sourceKey ( newSource ) ;
348+ const filtered = sources . filter ( ( s ) => sourceKey ( s ) !== key ) ;
234349 return [ ...filtered , newSource ] ;
235350}
351+
352+ function sourceKey ( source : Source ) : string {
353+ switch ( source . type ) {
354+ case "github" :
355+ return `github:${ source . owner } /${ source . repo } /${ source . subpath } ` ;
356+ case "file" :
357+ return `file:${ source . url } ` ;
358+ case "crawl" :
359+ return `crawl:${ source . url } ` ;
360+ }
361+ }
0 commit comments