diff --git a/library/html-server.js b/library/html-server.js index 5850429..629fb4a 100644 --- a/library/html-server.js +++ b/library/html-server.js @@ -84,6 +84,7 @@ class HtmlServer { .replace(/\[%content%\]/g, content) // Content is assumed to be already-safe HTML .replace(/\[%ver%\]/g, this.escapeHtml(renderOptions.version)) .replace(/\[%download-date%\]/g, this.escapeHtml(renderOptions.downloadDate)) + .replace(/\[%crawler-date%\]/g, this.escapeHtml(renderOptions.crawlerDate || renderOptions.downloadDate || 'Never')) .replace(/\[%total-resources%\]/g, this.escapeHtml(renderOptions.totalResources.toLocaleString())) .replace(/\[%total-packages%\]/g, this.escapeHtml(renderOptions.totalPackages.toLocaleString())) .replace(/\[%endpoint-path%\]/g, this.escapeHtml(renderOptions.endpointpath)) diff --git a/packages/package-crawler.js b/packages/package-crawler.js index d80c429..6e05a7b 100644 --- a/packages/package-crawler.js +++ b/packages/package-crawler.js @@ -27,19 +27,20 @@ class PackageCrawler { this.log = log; const startTime = Date.now(); + const masterSource = this.config.masterFile || this.config.masterUrl; this.crawlerLog = { startTime: new Date().toISOString(), - master: this.config.masterUrl, + master: masterSource, feeds: [], totalBytes: 0, errors: '' }; - this.log.info('Running web crawler for packages using master URL: '+ this.config.masterUrl); + this.log.info('Running web crawler for packages using master source: '+ masterSource); try { - // Fetch the master JSON file - const masterResponse = await this.fetchJson(this.config.masterUrl); + // Fetch the master JSON file using unified fetchJson method + const masterResponse = await this.fetchJson(masterSource); if (!masterResponse.feeds || !Array.isArray(masterResponse.feeds)) { throw new Error('Invalid master JSON: missing feeds array'); @@ -58,7 +59,7 @@ class PackageCrawler { try { await this.updateTheFeed( this.fixUrl(feedConfig.url), - this.config.masterUrl, + masterSource, feedConfig.errors ? feedConfig.errors.replace(/\|/g, '@').replace(/_/g, '.') : '', packageRestrictions ); @@ -93,7 +94,30 @@ class PackageCrawler { return url.replace(/^http:/, 'https:'); } - async fetchJson(url) { + async fetchJson(source) { + // Determine if source is a file path or URL + if (this.isFilePath(source)) { + return await this.fetchJsonFromFile(source); + } else { + return await this.fetchJsonFromUrl(source); + } + } + + isFilePath(source) { + // Check if it's a file path (not a URL) + return !source.startsWith('http://') && !source.startsWith('https://'); + } + + async fetchJsonFromFile(filePath) { + try { + const data = await fs.promises.readFile(filePath, 'utf8'); + return JSON.parse(data); + } catch (error) { + throw new Error(`Failed to read JSON from file ${filePath}: ${error.message}`); + } + } + + async fetchJsonFromUrl(url) { try { const response = await axios.get(url, { timeout: 30000, diff --git a/packages/packages.js b/packages/packages.js index 03c71c2..4c1fd1b 100644 --- a/packages/packages.js +++ b/packages/packages.js @@ -421,8 +421,15 @@ class PackagesModule { // Get counts from database const tableCounts = await this.getDatabaseTableCounts(); + // Format crawler date + let crawlerDate = 'Never'; + if (this.lastRunTime) { + crawlerDate = new Date(this.lastRunTime).toLocaleDateString(); + } + return { downloadDate: downloadDate, + crawlerDate: crawlerDate, totalResources: 0, // Packages don't track individual resources totalPackages: tableCounts.packages || 0, totalVersions: tableCounts.packageVersions || 0, @@ -437,6 +444,7 @@ class PackagesModule { return { downloadDate: 'Error', + crawlerDate: 'Never', totalResources: 0, totalPackages: 0, totalVersions: 0, @@ -555,10 +563,24 @@ class PackagesModule { async initialize(config) { this.config = config; - // Set default masterUrl if not configured - if (!this.config.masterUrl) { + // Validate masterFile/masterUrl configuration + if (this.config.masterFile && this.config.masterUrl) { + throw new Error('Cannot specify both masterFile and masterUrl. Please use only one.'); + } + + // Process masterFile if specified - normalize path early + if (this.config.masterFile) { + // If not absolute path, resolve relative to data directory + if (!path.isAbsolute(this.config.masterFile)) { + this.config.masterFile = folders.filePath(this.config.masterFile); + } + pckLog.info(`Using masterFile: ${this.config.masterFile}`); + } + + // Set default masterUrl if neither masterFile nor masterUrl configured + if (!this.config.masterFile && !this.config.masterUrl) { this.config.masterUrl = 'https://fhir.github.io/ig-registry/package-feeds.json'; - pckLog.info('No masterUrl configured, using default:', this.config.masterUrl); + pckLog.info('No masterFile or masterUrl configured, using default masterUrl:', this.config.masterUrl); } pckLog.info('Initializing Packages module...'); @@ -826,21 +848,22 @@ class PackagesModule { async runWebCrawler() { const startTime = Date.now(); this.totalRuns++; + const masterSource = this.config.masterFile || this.config.masterUrl; this.crawlerLog = { runNumber: this.totalRuns, startTime: new Date().toISOString(), - master: this.config.masterUrl, + master: masterSource, feeds: [], totalBytes: 0, errors: '' }; pckLog.info(`Running web crawler for packages (run #${this.totalRuns})...`); - pckLog.info('Fetching master URL:', this.config.masterUrl); + pckLog.info('Fetching master from:', masterSource); try { - // Fetch the master JSON file - const masterResponse = await this.fetchJson(this.config.masterUrl); + // Fetch the master JSON file using crawler's unified fetchJson method + const masterResponse = await this.crawler.fetchJson(masterSource); if (!masterResponse.feeds || !Array.isArray(masterResponse.feeds)) { throw new Error('Invalid master JSON: missing feeds array'); @@ -859,7 +882,7 @@ class PackagesModule { try { await this.updateTheFeed( this.fixUrl(feedConfig.url), - this.config.masterUrl, + masterSource, feedConfig.errors ? feedConfig.errors.replace(/\|/g, '@').replace(/_/g, '.') : '', packageRestrictions ); @@ -1212,7 +1235,8 @@ class PackagesModule { mirror: this.config.mirrorPath }, config: { - masterUrl: this.config.masterUrl + masterUrl: this.config.masterUrl, + masterFile: this.config.masterFile } }); } @@ -2759,7 +2783,11 @@ class PackagesModule { if (this.lastRunTime) { content += `Last Run${new Date(this.lastRunTime).toLocaleString()}`; } - content += `Master URL${htmlServer.escapeHtml(this.config.masterUrl)}`; + if (this.config.masterFile) { + content += `Master File${htmlServer.escapeHtml(this.config.masterFile)}`; + } else { + content += `Master URL${htmlServer.escapeHtml(this.config.masterUrl)}`; + } content += ''; content += ''; content += ''; diff --git a/server.js b/server.js index b542468..74f4890 100644 --- a/server.js +++ b/server.js @@ -221,28 +221,28 @@ async function buildRootPageContent() { content += '