diff --git a/components/retrack-types/src/trackers.rs b/components/retrack-types/src/trackers.rs index 1ce1794..941fcc6 100644 --- a/components/retrack-types/src/trackers.rs +++ b/components/retrack-types/src/trackers.rs @@ -22,8 +22,8 @@ pub use self::{ tracker_list_revisions_params::TrackerListRevisionsParams, tracker_target::{ ApiTarget, ConfiguratorScriptArgs, ConfiguratorScriptRequest, ConfiguratorScriptResult, - ExtractorEngine, ExtractorScriptArgs, ExtractorScriptResult, PageTarget, TargetRequest, - TargetResponse, TrackerTarget, + ExtractorEngine, ExtractorScriptArgs, ExtractorScriptResult, PageTarget, ProxyConfig, + ProxyCredentials, TargetRequest, TargetResponse, TrackerTarget, }, tracker_update_params::TrackerUpdateParams, trackers_list_params::TrackersListParams, @@ -61,6 +61,7 @@ pub mod tests { engine: None, user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: false, + proxy: None, }), config: TrackerConfig { revisions, diff --git a/components/retrack-types/src/trackers/tracker.rs b/components/retrack-types/src/trackers/tracker.rs index 0c93d38..c243e86 100644 --- a/components/retrack-types/src/trackers/tracker.rs +++ b/components/retrack-types/src/trackers/tracker.rs @@ -59,6 +59,7 @@ mod tests { engine: Some(ExtractorEngine::Chromium), user_agent: Some("Retrack/2.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })) .build(); assert_json_snapshot!(tracker, @r###" @@ -104,6 +105,7 @@ mod tests { engine: None, user_agent: Some("Retrack/2.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })) .with_schedule("0 0 * * *") .build(); @@ -147,6 +149,7 @@ mod tests { engine: None, user_agent: Some("Retrack/2.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })) .with_schedule("0 0 * * *") .build(); @@ -190,6 +193,7 @@ mod tests { engine: Some(ExtractorEngine::Camoufox), user_agent: Some("Retrack/2.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })) .with_schedule("0 0 * * *") .build(); @@ -237,6 +241,7 @@ mod tests { engine: None, user_agent: Some("Retrack/2.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })) .with_schedule("0 0 * * *") .with_job_config(SchedulerJobConfig { diff --git a/components/retrack-types/src/trackers/tracker_create_params.rs b/components/retrack-types/src/trackers/tracker_create_params.rs index f97b345..e6f71d1 100644 --- a/components/retrack-types/src/trackers/tracker_create_params.rs +++ b/components/retrack-types/src/trackers/tracker_create_params.rs @@ -54,6 +54,7 @@ mod tests { engine: None, user_agent: None, accept_invalid_certificates: false, + proxy: None, }), config: Default::default(), tags: vec![], @@ -85,6 +86,7 @@ mod tests { params: None, user_agent: None, accept_invalid_certificates: false, + proxy: None, }), config: TrackerConfig { revisions: 10, @@ -119,6 +121,7 @@ mod tests { engine: None, user_agent: None, accept_invalid_certificates: false, + proxy: None, }), config: TrackerConfig { revisions: 3, @@ -155,6 +158,7 @@ mod tests { engine: Some(ExtractorEngine::Chromium), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, }), config: TrackerConfig { revisions: 3, @@ -225,6 +229,7 @@ mod tests { engine: None, user_agent: None, accept_invalid_certificates: false, + proxy: None, }), config: Default::default(), tags: vec![], @@ -257,6 +262,7 @@ mod tests { engine: None, user_agent: None, accept_invalid_certificates: false, + proxy: None, }), config: TrackerConfig { revisions: 10, @@ -293,6 +299,7 @@ mod tests { engine: None, user_agent: None, accept_invalid_certificates: false, + proxy: None, }), config: TrackerConfig { revisions: 3, @@ -345,6 +352,7 @@ mod tests { engine: Some(ExtractorEngine::Chromium), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, }), config: TrackerConfig { revisions: 3, diff --git a/components/retrack-types/src/trackers/tracker_target.rs b/components/retrack-types/src/trackers/tracker_target.rs index 369e4b0..2c99788 100644 --- a/components/retrack-types/src/trackers/tracker_target.rs +++ b/components/retrack-types/src/trackers/tracker_target.rs @@ -1,5 +1,6 @@ mod api_target; mod page_target; +mod proxy_config; pub use self::{ api_target::{ @@ -7,6 +8,7 @@ pub use self::{ ExtractorScriptArgs, ExtractorScriptResult, TargetRequest, TargetResponse, }, page_target::{ExtractorEngine, PageTarget}, + proxy_config::{ProxyConfig, ProxyCredentials}, }; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; @@ -26,7 +28,7 @@ pub enum TrackerTarget { #[cfg(test)] mod tests { use super::TrackerTarget; - use crate::trackers::{ApiTarget, ExtractorEngine, PageTarget, TargetRequest}; + use crate::trackers::{ApiTarget, ExtractorEngine, PageTarget, ProxyConfig, ProxyCredentials, TargetRequest}; use http::{ Method, header::{AUTHORIZATION, CONTENT_TYPE}, @@ -42,6 +44,7 @@ mod tests { engine: None, user_agent: None, accept_invalid_certificates: false, + proxy: None, }); let target_json = json!({ "type": "page", @@ -59,6 +62,7 @@ mod tests { engine: Some(ExtractorEngine::Camoufox), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, }); let target_json = json!({ "type": "page", @@ -78,6 +82,7 @@ mod tests { requests: vec![TargetRequest::new("https://retrack.dev/".parse()?)], configurator: None, extractor: None, + proxy: None, }); let target_json = json!({ "type": "api", "requests": [{ "url": "https://retrack.dev/" }] }); assert_eq!(serde_json::to_value(&target)?, target_json); @@ -93,6 +98,7 @@ mod tests { }], configurator: None, extractor: None, + proxy: None, }); let target_json = json!({ "type": "api", "requests": [{ "url": "https://retrack.dev/", "method": "PUT" }] }); assert_eq!(serde_json::to_value(&target)?, target_json); @@ -117,6 +123,7 @@ mod tests { }], configurator: None, extractor: None, + proxy: None, }); let target_json = json!({ "type": "api", @@ -132,6 +139,61 @@ mod tests { target ); + // Test PageTarget with proxy + let target = TrackerTarget::Page(PageTarget { + extractor: "export async function execute(p) { await p.goto('https://retrack.dev/'); return await p.content(); }".to_string(), + params: None, + engine: None, + user_agent: None, + accept_invalid_certificates: false, + proxy: Some(ProxyConfig { + url: "http://proxy.example.com:8080".parse()?, + credentials: Some(ProxyCredentials { + scheme: "Basic".to_string(), + value: "dXNlcjpwYXNz".to_string(), + }), + }), + }); + let target_json = json!({ + "type": "page", + "extractor": "export async function execute(p) { await p.goto('https://retrack.dev/'); return await p.content(); }", + "proxy": { + "url": "http://proxy.example.com:8080/", + "credentials": { + "scheme": "Basic", + "value": "dXNlcjpwYXNz" + } + } + }); + assert_eq!(serde_json::to_value(&target)?, target_json); + assert_eq!( + serde_json::from_value::(target_json)?, + target + ); + + // Test ApiTarget with proxy + let target = TrackerTarget::Api(ApiTarget { + requests: vec![TargetRequest::new("https://retrack.dev/".parse()?)], + configurator: None, + extractor: None, + proxy: Some(ProxyConfig { + url: "http://proxy.example.com:8080".parse()?, + credentials: None, + }), + }); + let target_json = json!({ + "type": "api", + "requests": [{ "url": "https://retrack.dev/" }], + "proxy": { + "url": "http://proxy.example.com:8080/" + } + }); + assert_eq!(serde_json::to_value(&target)?, target_json); + assert_eq!( + serde_json::from_value::(target_json)?, + target + ); + Ok(()) } } diff --git a/components/retrack-types/src/trackers/tracker_target/api_target.rs b/components/retrack-types/src/trackers/tracker_target/api_target.rs index 10382e5..c4d966a 100644 --- a/components/retrack-types/src/trackers/tracker_target/api_target.rs +++ b/components/retrack-types/src/trackers/tracker_target/api_target.rs @@ -1,3 +1,4 @@ +use super::ProxyConfig; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; use utoipa::ToSchema; @@ -31,6 +32,9 @@ pub struct ApiTarget { /// Optional custom script (Deno) to extract only necessary data from the API response. pub extractor: Option, + + /// Optional proxy configuration. + pub proxy: Option, } #[cfg(test)] @@ -50,6 +54,7 @@ mod tests { requests: vec![TargetRequest::new(Url::parse("https://retrack.dev")?)], configurator: None, extractor: None, + proxy: None, }; let target_json = json!({ "requests": [{ "url": "https://retrack.dev/" }] }); assert_eq!(serde_json::to_value(&target)?, target_json); @@ -62,6 +67,7 @@ mod tests { }], configurator: None, extractor: None, + proxy: None, }; let target_json = json!({ "requests": [{"url": "https://retrack.dev/", "method": "PUT" }] }); @@ -84,6 +90,7 @@ mod tests { }], configurator: None, extractor: None, + proxy: None, }; let target_json = json!({ "requests": [{ @@ -115,6 +122,7 @@ mod tests { }], configurator: None, extractor: None, + proxy: None, }; let target_json = json!({ "requests": [{ @@ -152,6 +160,7 @@ mod tests { }], configurator: None, extractor: None, + proxy: None, }; let target_json = json!({ "requests": [{ @@ -195,6 +204,7 @@ mod tests { .to_string(), ), extractor: None, + proxy: None, }; let target_json = json!({ "requests": [{ @@ -241,6 +251,7 @@ mod tests { "((context) => ({ body: Deno.core.encode(JSON.stringify({ key: 'value' })) })();" .to_string(), ), + proxy: None, }; let target_json = json!({ "requests": [{ diff --git a/components/retrack-types/src/trackers/tracker_target/page_target.rs b/components/retrack-types/src/trackers/tracker_target/page_target.rs index a581aae..bb1d61a 100644 --- a/components/retrack-types/src/trackers/tracker_target/page_target.rs +++ b/components/retrack-types/src/trackers/tracker_target/page_target.rs @@ -1,5 +1,6 @@ mod extractor_engine; +use super::ProxyConfig; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; use utoipa::ToSchema; @@ -28,6 +29,9 @@ pub struct PageTarget { /// Whether to ignore invalid server certificates when sending network requests. #[serde(default, skip_serializing_if = "std::ops::Not::not")] pub accept_invalid_certificates: bool, + + /// Optional proxy configuration. + pub proxy: Option, } #[cfg(test)] @@ -48,6 +52,7 @@ mod tests { params: Some(json!({ "param": "value" })), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, }; let target_json = json!({ "extractor": "export async function execute(p) { await p.goto('https://retrack.dev/'); return await p.content(); }", diff --git a/components/retrack-types/src/trackers/tracker_target/proxy_config.rs b/components/retrack-types/src/trackers/tracker_target/proxy_config.rs new file mode 100644 index 0000000..565c538 --- /dev/null +++ b/components/retrack-types/src/trackers/tracker_target/proxy_config.rs @@ -0,0 +1,63 @@ +use serde::{Deserialize, Serialize}; +use serde_with::skip_serializing_none; +use url::Url; +use utoipa::ToSchema; + +/// Proxy configuration for tracker targets. +#[skip_serializing_none] +#[derive(Serialize, Deserialize, Debug, Clone, Hash, PartialEq, Eq, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ProxyConfig { + /// Proxy server URL. + pub url: Url, + + /// Optional credentials for proxy authentication. + pub credentials: Option, +} + +/// Proxy authentication credentials. +#[derive(Serialize, Deserialize, Debug, Clone, Hash, PartialEq, Eq, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ProxyCredentials { + /// Authentication scheme (e.g., "Basic", "Bearer"). + pub scheme: String, + + /// Authentication value that will be used for Proxy-Authorization HTTP header. + pub value: String, +} + +#[cfg(test)] +mod tests { + use super::{ProxyConfig, ProxyCredentials}; + use serde_json::json; + + #[test] + fn can_serialize_and_deserialize() -> anyhow::Result<()> { + let proxy = ProxyConfig { + url: "http://proxy.example.com:8080".parse()?, + credentials: None, + }; + let proxy_json = json!({ "url": "http://proxy.example.com:8080/" }); + assert_eq!(serde_json::to_value(&proxy)?, proxy_json); + assert_eq!(serde_json::from_value::(proxy_json)?, proxy); + + let proxy = ProxyConfig { + url: "http://proxy.example.com:8080".parse()?, + credentials: Some(ProxyCredentials { + scheme: "Basic".to_string(), + value: "dXNlcjpwYXNz".to_string(), + }), + }; + let proxy_json = json!({ + "url": "http://proxy.example.com:8080/", + "credentials": { + "scheme": "Basic", + "value": "dXNlcjpwYXNz" + } + }); + assert_eq!(serde_json::to_value(&proxy)?, proxy_json); + assert_eq!(serde_json::from_value::(proxy_json)?, proxy); + + Ok(()) + } +} diff --git a/components/retrack-types/src/trackers/tracker_update_params.rs b/components/retrack-types/src/trackers/tracker_update_params.rs index 372c83a..3bbe7fa 100644 --- a/components/retrack-types/src/trackers/tracker_update_params.rs +++ b/components/retrack-types/src/trackers/tracker_update_params.rs @@ -79,6 +79,7 @@ mod tests { engine: Some(ExtractorEngine::Chromium), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })), config: None, tags: None, @@ -129,6 +130,7 @@ mod tests { engine: Some(ExtractorEngine::Chromium), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })), config: Some(TrackerConfig { revisions: 3, @@ -186,6 +188,7 @@ mod tests { engine: Some(ExtractorEngine::Chromium), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })), config: Some(TrackerConfig { revisions: 3, @@ -305,6 +308,7 @@ mod tests { engine: Some(ExtractorEngine::Chromium), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })), config: None, tags: None, @@ -377,6 +381,7 @@ mod tests { engine: Some(ExtractorEngine::Chromium), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })), config: Some(TrackerConfig { revisions: 3, @@ -438,6 +443,7 @@ mod tests { engine: Some(ExtractorEngine::Chromium), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })), config: Some(TrackerConfig { revisions: 3, diff --git a/components/retrack-web-scraper/src/api/web_page/constants.ts b/components/retrack-web-scraper/src/api/web_page/constants.ts index 2ce2718..359a81e 100644 --- a/components/retrack-web-scraper/src/api/web_page/constants.ts +++ b/components/retrack-web-scraper/src/api/web_page/constants.ts @@ -31,6 +31,17 @@ export interface WorkerResultMessage { content: unknown; } +/** + * Represents proxy configuration. + */ +export interface ProxyConfig { + url: string; + credentials?: { + scheme: string; + value: string; + }; +} + /** * Represents the data passed to the worker thread. */ @@ -53,4 +64,6 @@ export interface WorkerData { acceptInvalidCertificates?: boolean; // Path to a folder where to save screenshots. screenshotsPath?: string; + // Optional proxy configuration. + proxy?: ProxyConfig; } diff --git a/components/retrack-web-scraper/src/api/web_page/execute.ts b/components/retrack-web-scraper/src/api/web_page/execute.ts index a9fd60e..e433b6c 100644 --- a/components/retrack-web-scraper/src/api/web_page/execute.ts +++ b/components/retrack-web-scraper/src/api/web_page/execute.ts @@ -130,6 +130,7 @@ export function registerExecuteRoutes({ config, server, getLocalBrowserServer }: userAgent: request.body.userAgent, acceptInvalidCertificates: request.body.acceptInvalidCertificates, screenshotsPath: config.browser.screenshotsPath, + proxy: request.body.proxy, }; try { diff --git a/components/retrack-web-scraper/src/api/web_page/worker.ts b/components/retrack-web-scraper/src/api/web_page/worker.ts index b5141af..090e367 100644 --- a/components/retrack-web-scraper/src/api/web_page/worker.ts +++ b/components/retrack-web-scraper/src/api/web_page/worker.ts @@ -26,6 +26,7 @@ const { userAgent, acceptInvalidCertificates, screenshotsPath, + proxy, } = workerData as WorkerData; // SECURITY: Basic prototype pollution protection against the most common vectors until we can use Playwright with @@ -92,7 +93,30 @@ try { throw new Error('Failed to connect to a browser.'); } -const context = await browser.newContext({ ignoreHTTPSErrors: acceptInvalidCertificates, userAgent, viewport: null }); +const contextOptions: { + ignoreHTTPSErrors: boolean; + userAgent?: string; + viewport: null; + proxy?: { server: string; username?: string; password?: string }; +} = { ignoreHTTPSErrors: acceptInvalidCertificates ?? false, userAgent, viewport: null }; + +// Configure proxy if provided +if (proxy) { + contextOptions.proxy = { server: proxy.url }; + // Note: Playwright's proxy authentication only supports username/password format + // For custom auth schemes (like Bearer), the credentials would need to be handled + // differently, potentially via extraHTTPHeaders. For now, we document this limitation. + if (proxy.credentials) { + // If using Basic auth, extract username and password + // This is a simplified implementation - full Basic auth would require base64 decoding + // For now, we'll just pass the server URL and note that custom auth isn't fully supported + log.warn( + `Proxy authentication with custom scheme '${proxy.credentials.scheme}' is configured, but Playwright only supports username/password format. Custom auth schemes may not work correctly.`, + ); + } +} + +const context = await browser.newContext(contextOptions); // SECURITY: Ideally, the extractor script shouldn't have access to the browser instance, as it could close the browser // and access other contexts. Unfortunately, the browser instance and context are accessible through various Playwright diff --git a/src/scheduler/scheduler_jobs/trackers_run_job.rs b/src/scheduler/scheduler_jobs/trackers_run_job.rs index c93711b..efae035 100644 --- a/src/scheduler/scheduler_jobs/trackers_run_job.rs +++ b/src/scheduler/scheduler_jobs/trackers_run_job.rs @@ -1078,6 +1078,7 @@ mod tests { requests: vec![TargetRequest::new(server.url("/api-normal-job").parse()?)], configurator: None, extractor: None, + proxy: None, })) .build(), ) @@ -1177,6 +1178,7 @@ mod tests { requests: vec![TargetRequest::new(server.url("/api-retry-job").parse()?)], configurator: None, extractor: None, + proxy: None, })) .build(); create_params.config.job = Some(SchedulerJobConfig { @@ -1290,6 +1292,7 @@ mod tests { requests: vec![TargetRequest::new(server.url("/api-failed-job").parse()?)], configurator: None, extractor: None, + proxy: None, })) .with_tags(vec!["tag1".to_string(), "tag2".to_string()]) .build(), @@ -1433,6 +1436,7 @@ mod tests { requests: vec![TargetRequest::new(server.url("/api-failed-retry").parse()?)], configurator: None, extractor: None, + proxy: None, })) .build(); create_params.config.job = Some(SchedulerJobConfig { diff --git a/src/server/handlers/trackers_create_revision.rs b/src/server/handlers/trackers_create_revision.rs index c1c3ad7..3dc6311 100644 --- a/src/server/handlers/trackers_create_revision.rs +++ b/src/server/handlers/trackers_create_revision.rs @@ -287,9 +287,11 @@ mod tests { body: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: Some(format!("{}/configurator.js", server.base_url())), extractor: None, + proxy: None, })) .build(), ) @@ -355,6 +357,7 @@ mod tests { body: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: Some(format!("{}/extractor.js", server.base_url())), diff --git a/src/trackers.rs b/src/trackers.rs index d3681e8..8004fbd 100644 --- a/src/trackers.rs +++ b/src/trackers.rs @@ -40,6 +40,7 @@ pub mod tests { engine: None, user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, }), config: Default::default(), tags: vec!["tag".to_string()], @@ -124,6 +125,7 @@ pub mod tests { timeout: tracker.config.timeout, // Non-tracker properties. previous_content: None, + proxy: target.proxy.as_ref(), }) } } @@ -150,6 +152,7 @@ pub mod tests { engine: None, user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: false, + proxy: None, }), config: TrackerConfig { revisions, diff --git a/src/trackers/api_ext.rs b/src/trackers/api_ext.rs index 58eeb9b..33ead7e 100644 --- a/src/trackers/api_ext.rs +++ b/src/trackers/api_ext.rs @@ -903,10 +903,11 @@ impl<'a, DR: DnsResolver> TrackersApiExt<'a, DR> { accept_invalid_certificates: target.accept_invalid_certificates, timeout: tracker.config.timeout, previous_content: previous_revision.as_ref().map(|rev| &rev.data), + proxy: target.proxy.as_ref(), }; let scraper_response = self - .http_client(false) + .http_client(false, None)? .post(format!( "{}api/web_page/execute", self.api.config.as_ref().components.web_scraper_url.as_str() @@ -1000,7 +1001,7 @@ impl<'a, DR: DnsResolver> TrackersApiExt<'a, DR> { let requests = requests_override.as_ref().unwrap_or(&target.requests); let mut responses = Vec::with_capacity(requests.len()); for (request_index, request) in requests.iter().enumerate() { - let client = self.http_client(request.accept_invalid_certificates); + let client = self.http_client(request.accept_invalid_certificates, target.proxy.as_ref())?; let request_builder = client.request( request.method.as_ref().unwrap_or(&Method::GET).clone(), request.url.clone(), @@ -1232,7 +1233,7 @@ impl<'a, DR: DnsResolver> TrackersApiExt<'a, DR> { } Ok(self - .http_client(false) + .http_client(false, None)? .get(url) .send() .await? @@ -1242,15 +1243,41 @@ impl<'a, DR: DnsResolver> TrackersApiExt<'a, DR> { } /// Constructs a new instance of the HTTP client with tracing and caching middleware. - fn http_client(&self, accept_invalid_certificates: bool) -> ClientWithMiddleware { + fn http_client( + &self, + accept_invalid_certificates: bool, + proxy: Option<&retrack_types::trackers::ProxyConfig>, + ) -> anyhow::Result { + let mut reqwest_builder = reqwest::Client::builder() + .danger_accept_invalid_certs(accept_invalid_certificates); + + // Configure proxy if provided + if let Some(proxy_config) = proxy { + let reqwest_proxy = reqwest::Proxy::all(proxy_config.url.clone()) + .with_context(|| format!("Failed to configure proxy with URL: {}", proxy_config.url))?; + + // Add proxy authentication if credentials are provided + let reqwest_proxy = if let Some(ref creds) = proxy_config.credentials { + // Build the Proxy-Authorization header value + let auth_value = format!("{} {}", creds.scheme, creds.value); + reqwest_proxy.custom_http_auth( + auth_value.parse().with_context(|| + format!("Failed to parse proxy auth header with scheme: {}", creds.scheme) + )? + ) + } else { + reqwest_proxy + }; + + reqwest_builder = reqwest_builder.proxy(reqwest_proxy); + } + let client_builder = ClientBuilder::new( - reqwest::Client::builder() - .danger_accept_invalid_certs(accept_invalid_certificates) - .build() - .expect("Failed to build http client"), + reqwest_builder.build().context("Failed to build HTTP client")? ) .with(TracingMiddleware::::new()); - if let Some(ref path) = self.api.config.cache.http_cache_path { + + Ok(if let Some(ref path) = self.api.config.cache.http_cache_path { client_builder .with(Cache(HttpCache { mode: CacheMode::Default, @@ -1260,7 +1287,7 @@ impl<'a, DR: DnsResolver> TrackersApiExt<'a, DR> { .build() } else { client_builder.build() - } + }) } } @@ -1358,6 +1385,7 @@ mod tests { media_type: Some("application/json".parse()?), accept_statuses: Some([StatusCode::OK].into_iter().collect()), accept_invalid_certificates: true, + proxy: None, }], configurator: Some("(async () => ({ body: Deno.core.encode(JSON.stringify({ key: 'value' })) })();".to_string()), extractor: Some("((context) => ({ body: Deno.core.encode(JSON.stringify({ key: 'value' })) })();".to_string()), @@ -1389,6 +1417,7 @@ mod tests { engine: None, user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, }); let config = TrackerConfig { revisions: 3, @@ -1947,6 +1976,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }, 11).collect::>(), configurator: None, extractor: None, @@ -1972,6 +2002,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: None @@ -2011,6 +2042,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: None @@ -2036,6 +2068,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: Some("".to_string()), extractor: None @@ -2061,6 +2094,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: Some( "a".repeat(global_config.trackers.max_script_size.as_u64() as usize + 1) @@ -2088,6 +2122,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: Some("".to_string()) @@ -2113,6 +2148,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: Some( @@ -2144,6 +2180,7 @@ mod tests { engine: None, user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, }), config: TrackerConfig { revisions: 3, @@ -2683,6 +2720,7 @@ mod tests { engine: None, user_agent: Some("".to_string()), accept_invalid_certificates: false, + proxy: None, })), ..Default::default() }).await), @@ -2698,6 +2736,7 @@ mod tests { engine: None, user_agent: Some("a".repeat(201)), accept_invalid_certificates: false, + proxy: None, })), ..Default::default() }).await), @@ -2853,6 +2892,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }, 11).collect::>(), configurator: None, extractor: None @@ -2874,6 +2914,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: None @@ -2895,6 +2936,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: Some("".to_string()), extractor: None @@ -2916,6 +2958,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: Some( "a".repeat(global_config.trackers.max_script_size.as_u64() as usize + 1) @@ -2939,6 +2982,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: Some("".to_string()) @@ -2960,6 +3004,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: Some( @@ -2998,6 +3043,7 @@ mod tests { media_type: None, accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: None @@ -3696,6 +3742,7 @@ mod tests { media_type: Some("application/json".parse()?), accept_statuses: Some([StatusCode::OK].into_iter().collect()), accept_invalid_certificates: true, + proxy: None, }], configurator: None, extractor: None, @@ -4196,6 +4243,7 @@ mod tests { ), accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: None, @@ -4311,6 +4359,7 @@ mod tests { media_type: Some("text/csv".parse()?), accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: None, extractor: None, @@ -4397,6 +4446,7 @@ mod tests { media_type: Some("text/csv".parse()?), accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }, TargetRequest { url: server.url("/api/json-call").parse()?, @@ -4409,6 +4459,7 @@ mod tests { media_type: Some("application/json".parse()?), accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }, ], configurator: None, @@ -4503,6 +4554,7 @@ mod tests { media_type: Some("application/json".parse()?), accept_statuses: None, accept_invalid_certificates: false, + proxy: None, }], configurator: Some(server.url("/configurator.js")), extractor: Some(server.url("/extractor.js")), @@ -6067,6 +6119,7 @@ mod tests { engine: None, user_agent: Some("Unknown/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })), config: Some(TrackerConfig { revisions: 4, @@ -6153,6 +6206,7 @@ mod tests { engine: None, user_agent: Some("Unknown/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })), config: Some(TrackerConfig { revisions: 4, @@ -6234,6 +6288,7 @@ mod tests { engine: None, user_agent: Some("Unknown/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, })), config: Some(TrackerConfig { revisions: 4, diff --git a/src/trackers/database_ext/raw_tracker.rs b/src/trackers/database_ext/raw_tracker.rs index 2d9bb8e..cc2c208 100644 --- a/src/trackers/database_ext/raw_tracker.rs +++ b/src/trackers/database_ext/raw_tracker.rs @@ -71,6 +71,7 @@ struct RawPageTarget<'s> { extractor_engine: Option, user_agent: Option>, accept_invalid_certificates: Option, + proxy: Option>, } #[derive(Serialize, Deserialize, Clone, Debug, Eq, PartialEq)] @@ -79,12 +80,25 @@ enum RawExtractorEngine { Camoufox, } +#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)] +struct RawProxyConfig<'s> { + url: Cow<'s, str>, + credentials: Option>, +} + +#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)] +struct RawProxyCredentials<'s> { + scheme: Cow<'s, str>, + value: Cow<'s, str>, +} + #[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)] struct RawApiTarget<'s> { #[serde(borrow)] requests: Vec>, configurator: Option>, extractor: Option>, + proxy: Option>, } #[serde_as] @@ -233,9 +247,22 @@ fn parse_raw_page_target(raw: RawPageTarget) -> anyhow::Result { }), user_agent: raw.user_agent.map(Cow::into_owned), accept_invalid_certificates: raw.accept_invalid_certificates.unwrap_or_default(), + proxy: raw.proxy.map(parse_raw_proxy_config).transpose()?, })) } +fn parse_raw_proxy_config(raw: RawProxyConfig) -> anyhow::Result { + use retrack_types::trackers::{ProxyConfig, ProxyCredentials}; + + Ok(ProxyConfig { + url: raw.url.into_owned().parse()?, + credentials: raw.credentials.map(|creds| ProxyCredentials { + scheme: creds.scheme.into_owned(), + value: creds.value.into_owned(), + }), + }) +} + fn parse_raw_api_target(raw: RawApiTarget) -> anyhow::Result { Ok(TrackerTarget::Api(ApiTarget { requests: raw @@ -269,6 +296,7 @@ fn parse_raw_api_target(raw: RawApiTarget) -> anyhow::Result { .collect::>>()?, configurator: raw.configurator.map(Cow::into_owned), extractor: raw.extractor.map(Cow::into_owned), + proxy: raw.proxy.map(parse_raw_proxy_config).transpose()?, })) } @@ -344,6 +372,13 @@ impl TryFrom<&Tracker> for RawTracker { } else { None }, + proxy: target.proxy.as_ref().map(|proxy| RawProxyConfig { + url: Cow::Borrowed(proxy.url.as_str()), + credentials: proxy.credentials.as_ref().map(|creds| RawProxyCredentials { + scheme: Cow::Borrowed(&creds.scheme), + value: Cow::Borrowed(&creds.value), + }), + }), }), TrackerTarget::Api(target) => RawTrackerTarget::Api(RawApiTarget { requests: target @@ -392,6 +427,13 @@ impl TryFrom<&Tracker> for RawTracker { .extractor .as_ref() .map(|extractor| Cow::Borrowed(extractor.as_ref())), + proxy: target.proxy.as_ref().map(|proxy| RawProxyConfig { + url: Cow::Borrowed(proxy.url.as_str()), + credentials: proxy.credentials.as_ref().map(|creds| RawProxyCredentials { + scheme: Cow::Borrowed(&creds.scheme), + value: Cow::Borrowed(&creds.value), + }), + }), }), }, actions: item.actions.iter().map(|action| action.into()).collect(), @@ -534,6 +576,7 @@ mod v1 { .collect::>>()?, configurator: raw.configurator.map(Cow::into_owned), extractor: raw.extractor.map(Cow::into_owned), + proxy: None, // No proxy in v1 })) } @@ -606,6 +649,7 @@ mod tests { engine: None, user_agent: None, accept_invalid_certificates: false, + proxy: None, }), config: TrackerConfig { revisions: 1, @@ -628,6 +672,7 @@ mod tests { engine: Some(ExtractorEngine::Camoufox), user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, }), config: TrackerConfig { revisions: 1, @@ -674,6 +719,7 @@ mod tests { requests: vec![TargetRequest::new("https://retrack.dev/".parse()?)], configurator: None, extractor: None, + proxy: None, }), config: TrackerConfig::default(), actions: vec![TrackerAction::ServerLog(Default::default())], diff --git a/src/trackers/web_scraper/web_scraper_content_request.rs b/src/trackers/web_scraper/web_scraper_content_request.rs index f2c1315..5d8b648 100644 --- a/src/trackers/web_scraper/web_scraper_content_request.rs +++ b/src/trackers/web_scraper/web_scraper_content_request.rs @@ -1,4 +1,4 @@ -use retrack_types::trackers::TrackerDataValue; +use retrack_types::trackers::{ProxyConfig, TrackerDataValue}; use serde::Serialize; use serde_json::Value as JsonValue; use serde_with::{DurationMilliSeconds, serde_as, skip_serializing_none}; @@ -35,6 +35,9 @@ pub struct WebScraperContentRequest<'a> { /// Optional content of the web page that has been extracted previously. pub previous_content: Option<&'a TrackerDataValue>, + + /// Optional proxy configuration. + pub proxy: Option<&'a ProxyConfig>, } /// Represents engines supported by the Web Scraper component. @@ -65,7 +68,8 @@ mod tests { timeout: Some(Duration::from_millis(100)), previous_content: Some(&TrackerDataValue::new(json!("some content"))), user_agent: Some("Retrack/1.0.0"), - accept_invalid_certificates: true + accept_invalid_certificates: true, + proxy: None, }, @r###" { "extractor": "export async function execute(p) { await p.goto('http://localhost:1234/my/app?q=2'); return await p.content(); }", @@ -97,6 +101,7 @@ mod tests { engine: None, user_agent: Some("Retrack/1.0.0".to_string()), accept_invalid_certificates: true, + proxy: None, }; let tracker = MockTrackerBuilder::create( uuid!("00000000-0000-0000-0000-000000000001"), @@ -142,6 +147,7 @@ mod tests { engine: Some(engine), user_agent: None, accept_invalid_certificates: false, + proxy: None, })) .build();