polars_io/cloud/
options.rs

1#[cfg(feature = "aws")]
2use std::io::Read;
3#[cfg(feature = "aws")]
4use std::path::Path;
5use std::str::FromStr;
6use std::sync::LazyLock;
7
8#[cfg(any(feature = "aws", feature = "gcp", feature = "azure", feature = "http"))]
9use object_store::ClientOptions;
10#[cfg(feature = "aws")]
11use object_store::aws::AmazonS3Builder;
12#[cfg(feature = "aws")]
13pub use object_store::aws::AmazonS3ConfigKey;
14#[cfg(feature = "azure")]
15pub use object_store::azure::AzureConfigKey;
16#[cfg(feature = "azure")]
17use object_store::azure::MicrosoftAzureBuilder;
18#[cfg(feature = "gcp")]
19use object_store::gcp::GoogleCloudStorageBuilder;
20#[cfg(feature = "gcp")]
21pub use object_store::gcp::GoogleConfigKey;
22#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
23use object_store::{BackoffConfig, RetryConfig};
24use polars_error::*;
25#[cfg(feature = "aws")]
26use polars_utils::cache::LruCache;
27#[cfg(feature = "http")]
28use reqwest::header::HeaderMap;
29#[cfg(feature = "serde")]
30use serde::{Deserialize, Serialize};
31#[cfg(feature = "cloud")]
32use url::Url;
33
34#[cfg(feature = "cloud")]
35use super::credential_provider::PlCredentialProvider;
36#[cfg(feature = "file_cache")]
37use crate::file_cache::get_env_file_cache_ttl;
38#[cfg(feature = "aws")]
39use crate::pl_async::with_concurrency_budget;
40
41#[cfg(feature = "aws")]
42static BUCKET_REGION: LazyLock<
43    std::sync::Mutex<LruCache<polars_utils::pl_str::PlSmallStr, polars_utils::pl_str::PlSmallStr>>,
44> = LazyLock::new(|| std::sync::Mutex::new(LruCache::with_capacity(32)));
45
46/// The type of the config keys must satisfy the following requirements:
47/// 1. must be easily collected into a HashMap, the type required by the object_crate API.
48/// 2. be Serializable, required when the serde-lazy feature is defined.
49/// 3. not actually use HashMap since that type is disallowed in Polars for performance reasons.
50///
51/// Currently this type is a vector of pairs config key - config value.
52#[allow(dead_code)]
53type Configs<T> = Vec<(T, String)>;
54
55#[derive(Clone, Debug, PartialEq, Hash, Eq)]
56#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
57#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
58pub(crate) enum CloudConfig {
59    #[cfg(feature = "aws")]
60    Aws(
61        #[cfg_attr(feature = "dsl-schema", schemars(with = "Vec<(String, String)>"))]
62        Configs<AmazonS3ConfigKey>,
63    ),
64    #[cfg(feature = "azure")]
65    Azure(
66        #[cfg_attr(feature = "dsl-schema", schemars(with = "Vec<(String, String)>"))]
67        Configs<AzureConfigKey>,
68    ),
69    #[cfg(feature = "gcp")]
70    Gcp(
71        #[cfg_attr(feature = "dsl-schema", schemars(with = "Vec<(String, String)>"))]
72        Configs<GoogleConfigKey>,
73    ),
74    #[cfg(feature = "http")]
75    Http { headers: Vec<(String, String)> },
76}
77
78#[derive(Clone, Debug, PartialEq, Hash, Eq)]
79#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
80#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
81/// Options to connect to various cloud providers.
82pub struct CloudOptions {
83    pub max_retries: usize,
84    #[cfg(feature = "file_cache")]
85    pub file_cache_ttl: u64,
86    pub(crate) config: Option<CloudConfig>,
87    #[cfg(feature = "cloud")]
88    /// Note: In most cases you will want to access this via [`CloudOptions::initialized_credential_provider`]
89    /// rather than directly.
90    pub(crate) credential_provider: Option<PlCredentialProvider>,
91}
92
93impl Default for CloudOptions {
94    fn default() -> Self {
95        Self::default_static_ref().clone()
96    }
97}
98
99impl CloudOptions {
100    pub fn default_static_ref() -> &'static Self {
101        static DEFAULT: LazyLock<CloudOptions> = LazyLock::new(|| CloudOptions {
102            max_retries: 2,
103            #[cfg(feature = "file_cache")]
104            file_cache_ttl: get_env_file_cache_ttl(),
105            config: None,
106            #[cfg(feature = "cloud")]
107            credential_provider: None,
108        });
109
110        &DEFAULT
111    }
112}
113
114#[cfg(feature = "http")]
115pub(crate) fn try_build_http_header_map_from_items_slice<S: AsRef<str>>(
116    headers: &[(S, S)],
117) -> PolarsResult<HeaderMap> {
118    use reqwest::header::{HeaderName, HeaderValue};
119
120    let mut map = HeaderMap::with_capacity(headers.len());
121    for (k, v) in headers {
122        let (k, v) = (k.as_ref(), v.as_ref());
123        map.insert(
124            HeaderName::from_str(k).map_err(to_compute_err)?,
125            HeaderValue::from_str(v).map_err(to_compute_err)?,
126        );
127    }
128
129    Ok(map)
130}
131
132#[allow(dead_code)]
133/// Parse an untype configuration hashmap to a typed configuration for the given configuration key type.
134fn parse_untyped_config<T, I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>>(
135    config: I,
136) -> PolarsResult<Configs<T>>
137where
138    T: FromStr + Eq + std::hash::Hash,
139{
140    Ok(config
141        .into_iter()
142        // Silently ignores custom upstream storage_options
143        .filter_map(|(key, val)| {
144            T::from_str(key.as_ref().to_ascii_lowercase().as_str())
145                .ok()
146                .map(|typed_key| (typed_key, val.into()))
147        })
148        .collect::<Configs<T>>())
149}
150
151#[derive(Debug, Clone, PartialEq)]
152pub enum CloudType {
153    Aws,
154    Azure,
155    File,
156    Gcp,
157    Http,
158    Hf,
159}
160
161impl CloudType {
162    #[cfg(feature = "cloud")]
163    pub(crate) fn from_url(parsed: &Url) -> PolarsResult<Self> {
164        Ok(match parsed.scheme() {
165            "s3" | "s3a" => Self::Aws,
166            "az" | "azure" | "adl" | "abfs" | "abfss" => Self::Azure,
167            "gs" | "gcp" | "gcs" => Self::Gcp,
168            "file" => Self::File,
169            "http" | "https" => Self::Http,
170            "hf" => Self::Hf,
171            _ => polars_bail!(ComputeError: "unknown url scheme"),
172        })
173    }
174}
175
176#[cfg(feature = "cloud")]
177pub(crate) fn parse_url(input: &str) -> std::result::Result<url::Url, url::ParseError> {
178    Ok(if input.contains("://") {
179        if input.starts_with("http://") || input.starts_with("https://") {
180            url::Url::parse(input)
181        } else {
182            let occurrences: usize = input
183                .as_bytes()
184                .iter()
185                .map(|&c| if c == b'%' || c == b'?' { 1 } else { 0 })
186                .sum();
187
188            if occurrences == 0 {
189                url::Url::parse(input)
190            } else {
191                let mut out: Vec<u8> = Vec::with_capacity(input.len() + occurrences);
192
193                for c in input.as_bytes() {
194                    let c = *c;
195                    if c == b'%' {
196                        out.extend(b"%25");
197                    } else if c == b'?' {
198                        out.extend(b"%3F")
199                    } else {
200                        out.push(c);
201                    }
202                }
203
204                url::Url::parse(&String::from_utf8(out).unwrap())
205            }
206        }?
207    } else {
208        let path = std::path::Path::new(input);
209        let mut tmp;
210        url::Url::from_file_path(if path.is_relative() {
211            tmp = std::env::current_dir().unwrap();
212            tmp.push(path);
213            tmp.as_path()
214        } else {
215            path
216        })
217        .unwrap()
218    })
219}
220
221impl FromStr for CloudType {
222    type Err = PolarsError;
223
224    #[cfg(feature = "cloud")]
225    fn from_str(url: &str) -> Result<Self, Self::Err> {
226        let parsed = parse_url(url).map_err(to_compute_err)?;
227        Self::from_url(&parsed)
228    }
229
230    #[cfg(not(feature = "cloud"))]
231    fn from_str(_s: &str) -> Result<Self, Self::Err> {
232        polars_bail!(ComputeError: "at least one of the cloud features must be enabled");
233    }
234}
235#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
236fn get_retry_config(max_retries: usize) -> RetryConfig {
237    RetryConfig {
238        backoff: BackoffConfig::default(),
239        max_retries,
240        retry_timeout: std::time::Duration::from_secs(10),
241    }
242}
243
244#[cfg(any(feature = "aws", feature = "gcp", feature = "azure", feature = "http"))]
245pub(super) fn get_client_options() -> ClientOptions {
246    ClientOptions::new()
247        // We set request timeout super high as the timeout isn't reset at ACK,
248        // but starts from the moment we start downloading a body.
249        // https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.timeout
250        .with_timeout_disabled()
251        // Concurrency can increase connection latency, so set to None, similar to default.
252        .with_connect_timeout_disabled()
253        .with_allow_http(true)
254}
255
256#[cfg(feature = "aws")]
257fn read_config(
258    builder: &mut AmazonS3Builder,
259    items: &[(&Path, &[(&str, AmazonS3ConfigKey)])],
260) -> Option<()> {
261    use crate::path_utils::resolve_homedir;
262
263    for (path, keys) in items {
264        if keys
265            .iter()
266            .all(|(_, key)| builder.get_config_value(key).is_some())
267        {
268            continue;
269        }
270
271        let mut config = std::fs::File::open(resolve_homedir(path)).ok()?;
272        let mut buf = vec![];
273        config.read_to_end(&mut buf).ok()?;
274        let content = std::str::from_utf8(buf.as_ref()).ok()?;
275
276        for (pattern, key) in keys.iter() {
277            if builder.get_config_value(key).is_none() {
278                let reg = polars_utils::regex_cache::compile_regex(pattern).unwrap();
279                let cap = reg.captures(content)?;
280                let m = cap.get(1)?;
281                let parsed = m.as_str();
282                *builder = std::mem::take(builder).with_config(*key, parsed);
283            }
284        }
285    }
286    Some(())
287}
288
289impl CloudOptions {
290    /// Set the maximum number of retries.
291    pub fn with_max_retries(mut self, max_retries: usize) -> Self {
292        self.max_retries = max_retries;
293        self
294    }
295
296    #[cfg(feature = "cloud")]
297    pub fn with_credential_provider(
298        mut self,
299        credential_provider: Option<PlCredentialProvider>,
300    ) -> Self {
301        self.credential_provider = credential_provider;
302        self
303    }
304
305    /// Set the configuration for AWS connections. This is the preferred API from rust.
306    #[cfg(feature = "aws")]
307    pub fn with_aws<I: IntoIterator<Item = (AmazonS3ConfigKey, impl Into<String>)>>(
308        mut self,
309        configs: I,
310    ) -> Self {
311        self.config = Some(CloudConfig::Aws(
312            configs.into_iter().map(|(k, v)| (k, v.into())).collect(),
313        ));
314        self
315    }
316
317    /// Build the [`object_store::ObjectStore`] implementation for AWS.
318    #[cfg(feature = "aws")]
319    pub async fn build_aws(
320        &self,
321        url: &str,
322        clear_cached_credentials: bool,
323    ) -> PolarsResult<impl object_store::ObjectStore> {
324        use super::credential_provider::IntoCredentialProvider;
325
326        let opt_credential_provider =
327            self.initialized_credential_provider(clear_cached_credentials)?;
328
329        let mut builder = AmazonS3Builder::from_env()
330            .with_client_options(get_client_options())
331            .with_url(url);
332
333        if let Some(credential_provider) = &opt_credential_provider {
334            let storage_update_options = parse_untyped_config::<AmazonS3ConfigKey, _>(
335                credential_provider
336                    .storage_update_options()?
337                    .into_iter()
338                    .map(|(k, v)| (k, v.to_string())),
339            )?;
340
341            for (key, value) in storage_update_options {
342                builder = builder.with_config(key, value);
343            }
344        }
345
346        read_config(
347            &mut builder,
348            &[(
349                Path::new("~/.aws/config"),
350                &[("region\\s*=\\s*([^\r\n]*)", AmazonS3ConfigKey::Region)],
351            )],
352        );
353
354        read_config(
355            &mut builder,
356            &[(
357                Path::new("~/.aws/credentials"),
358                &[
359                    (
360                        "aws_access_key_id\\s*=\\s*([^\\r\\n]*)",
361                        AmazonS3ConfigKey::AccessKeyId,
362                    ),
363                    (
364                        "aws_secret_access_key\\s*=\\s*([^\\r\\n]*)",
365                        AmazonS3ConfigKey::SecretAccessKey,
366                    ),
367                    (
368                        "aws_session_token\\s*=\\s*([^\\r\\n]*)",
369                        AmazonS3ConfigKey::Token,
370                    ),
371                ],
372            )],
373        );
374
375        if let Some(options) = &self.config {
376            let CloudConfig::Aws(options) = options else {
377                panic!("impl error: cloud type mismatch")
378            };
379            for (key, value) in options {
380                builder = builder.with_config(*key, value);
381            }
382        }
383
384        if builder
385            .get_config_value(&AmazonS3ConfigKey::DefaultRegion)
386            .is_none()
387            && builder
388                .get_config_value(&AmazonS3ConfigKey::Region)
389                .is_none()
390        {
391            let bucket = crate::cloud::CloudLocation::new(url, false)?.bucket;
392            let region = {
393                let mut bucket_region = BUCKET_REGION.lock().unwrap();
394                bucket_region.get(bucket.as_str()).cloned()
395            };
396
397            match region {
398                Some(region) => {
399                    builder = builder.with_config(AmazonS3ConfigKey::Region, region.as_str())
400                },
401                None => {
402                    if builder
403                        .get_config_value(&AmazonS3ConfigKey::Endpoint)
404                        .is_some()
405                    {
406                        // Set a default value if the endpoint is not aws.
407                        // See: #13042
408                        builder = builder.with_config(AmazonS3ConfigKey::Region, "us-east-1");
409                    } else {
410                        polars_warn!(
411                            "'(default_)region' not set; polars will try to get it from bucket\n\nSet the region manually to silence this warning."
412                        );
413                        let result = with_concurrency_budget(1, || async {
414                            reqwest::Client::builder()
415                                .build()
416                                .unwrap()
417                                .head(format!("https://{bucket}.s3.amazonaws.com"))
418                                .send()
419                                .await
420                                .map_err(to_compute_err)
421                        })
422                        .await?;
423                        if let Some(region) = result.headers().get("x-amz-bucket-region") {
424                            let region =
425                                std::str::from_utf8(region.as_bytes()).map_err(to_compute_err)?;
426                            let mut bucket_region = BUCKET_REGION.lock().unwrap();
427                            bucket_region.insert(bucket, region.into());
428                            builder = builder.with_config(AmazonS3ConfigKey::Region, region)
429                        }
430                    }
431                },
432            };
433        };
434
435        let builder = builder.with_retry(get_retry_config(self.max_retries));
436
437        let opt_credential_provider = match opt_credential_provider {
438            #[cfg(feature = "python")]
439            Some(PlCredentialProvider::Python(object)) => {
440                if pyo3::Python::with_gil(|py| {
441                    let Ok(func_object) = object
442                        .unwrap_as_provider_ref()
443                        .getattr(py, "_can_use_as_provider")
444                    else {
445                        return PolarsResult::Ok(true);
446                    };
447
448                    Ok(func_object.call0(py)?.extract::<bool>(py).unwrap())
449                })? {
450                    Some(PlCredentialProvider::Python(object))
451                } else {
452                    None
453                }
454            },
455
456            v => v,
457        };
458
459        let builder = if let Some(credential_provider) = opt_credential_provider {
460            builder.with_credentials(credential_provider.into_aws_provider())
461        } else {
462            builder
463        };
464
465        let out = builder.build()?;
466
467        Ok(out)
468    }
469
470    /// Set the configuration for Azure connections. This is the preferred API from rust.
471    #[cfg(feature = "azure")]
472    pub fn with_azure<I: IntoIterator<Item = (AzureConfigKey, impl Into<String>)>>(
473        mut self,
474        configs: I,
475    ) -> Self {
476        self.config = Some(CloudConfig::Azure(
477            configs.into_iter().map(|(k, v)| (k, v.into())).collect(),
478        ));
479        self
480    }
481
482    /// Build the [`object_store::ObjectStore`] implementation for Azure.
483    #[cfg(feature = "azure")]
484    pub fn build_azure(
485        &self,
486        url: &str,
487        clear_cached_credentials: bool,
488    ) -> PolarsResult<impl object_store::ObjectStore> {
489        use super::credential_provider::IntoCredentialProvider;
490
491        let verbose = polars_core::config::verbose();
492
493        // The credential provider `self.credentials` is prioritized if it is set. We also need
494        // `from_env()` as it may source environment configured storage account name.
495        let mut builder =
496            MicrosoftAzureBuilder::from_env().with_client_options(get_client_options());
497
498        if let Some(options) = &self.config {
499            let CloudConfig::Azure(options) = options else {
500                panic!("impl error: cloud type mismatch")
501            };
502            for (key, value) in options.iter() {
503                builder = builder.with_config(*key, value);
504            }
505        }
506
507        let builder = builder
508            .with_url(url)
509            .with_retry(get_retry_config(self.max_retries));
510
511        let builder =
512            if let Some(v) = self.initialized_credential_provider(clear_cached_credentials)? {
513                if verbose {
514                    eprintln!(
515                        "[CloudOptions::build_azure]: Using credential provider {:?}",
516                        &v
517                    );
518                }
519                builder.with_credentials(v.into_azure_provider())
520            } else {
521                builder
522            };
523
524        let out = builder.build()?;
525
526        Ok(out)
527    }
528
529    /// Set the configuration for GCP connections. This is the preferred API from rust.
530    #[cfg(feature = "gcp")]
531    pub fn with_gcp<I: IntoIterator<Item = (GoogleConfigKey, impl Into<String>)>>(
532        mut self,
533        configs: I,
534    ) -> Self {
535        self.config = Some(CloudConfig::Gcp(
536            configs.into_iter().map(|(k, v)| (k, v.into())).collect(),
537        ));
538        self
539    }
540
541    /// Build the [`object_store::ObjectStore`] implementation for GCP.
542    #[cfg(feature = "gcp")]
543    pub fn build_gcp(
544        &self,
545        url: &str,
546        clear_cached_credentials: bool,
547    ) -> PolarsResult<impl object_store::ObjectStore> {
548        use super::credential_provider::IntoCredentialProvider;
549
550        let credential_provider = self.initialized_credential_provider(clear_cached_credentials)?;
551
552        let builder = if credential_provider.is_none() {
553            GoogleCloudStorageBuilder::from_env()
554        } else {
555            GoogleCloudStorageBuilder::new()
556        };
557
558        let mut builder = builder.with_client_options(get_client_options());
559
560        if let Some(options) = &self.config {
561            let CloudConfig::Gcp(options) = options else {
562                panic!("impl error: cloud type mismatch")
563            };
564            for (key, value) in options.iter() {
565                builder = builder.with_config(*key, value);
566            }
567        }
568
569        let builder = builder
570            .with_url(url)
571            .with_retry(get_retry_config(self.max_retries));
572
573        let builder = if let Some(v) = credential_provider {
574            builder.with_credentials(v.into_gcp_provider())
575        } else {
576            builder
577        };
578
579        let out = builder.build()?;
580
581        Ok(out)
582    }
583
584    #[cfg(feature = "http")]
585    pub fn build_http(&self, url: &str) -> PolarsResult<impl object_store::ObjectStore> {
586        let out = object_store::http::HttpBuilder::new()
587            .with_url(url)
588            .with_client_options({
589                let mut opts = super::get_client_options();
590                if let Some(CloudConfig::Http { headers }) = &self.config {
591                    opts = opts.with_default_headers(try_build_http_header_map_from_items_slice(
592                        headers.as_slice(),
593                    )?);
594                }
595                opts
596            })
597            .build()?;
598
599        Ok(out)
600    }
601
602    /// Parse a configuration from a Hashmap. This is the interface from Python.
603    #[allow(unused_variables)]
604    pub fn from_untyped_config<I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>>(
605        url: &str,
606        config: I,
607    ) -> PolarsResult<Self> {
608        match CloudType::from_str(url)? {
609            CloudType::Aws => {
610                #[cfg(feature = "aws")]
611                {
612                    parse_untyped_config::<AmazonS3ConfigKey, _>(config)
613                        .map(|aws| Self::default().with_aws(aws))
614                }
615                #[cfg(not(feature = "aws"))]
616                {
617                    polars_bail!(ComputeError: "'aws' feature is not enabled");
618                }
619            },
620            CloudType::Azure => {
621                #[cfg(feature = "azure")]
622                {
623                    parse_untyped_config::<AzureConfigKey, _>(config)
624                        .map(|azure| Self::default().with_azure(azure))
625                }
626                #[cfg(not(feature = "azure"))]
627                {
628                    polars_bail!(ComputeError: "'azure' feature is not enabled");
629                }
630            },
631            CloudType::File => Ok(Self::default()),
632            CloudType::Http => Ok(Self::default()),
633            CloudType::Gcp => {
634                #[cfg(feature = "gcp")]
635                {
636                    parse_untyped_config::<GoogleConfigKey, _>(config)
637                        .map(|gcp| Self::default().with_gcp(gcp))
638                }
639                #[cfg(not(feature = "gcp"))]
640                {
641                    polars_bail!(ComputeError: "'gcp' feature is not enabled");
642                }
643            },
644            CloudType::Hf => {
645                #[cfg(feature = "http")]
646                {
647                    use polars_core::config;
648
649                    use crate::path_utils::resolve_homedir;
650
651                    let mut this = Self::default();
652                    let mut token = None;
653                    let verbose = config::verbose();
654
655                    for (i, (k, v)) in config.into_iter().enumerate() {
656                        let (k, v) = (k.as_ref(), v.into());
657
658                        if i == 0 && k == "token" {
659                            if verbose {
660                                eprintln!("HF token sourced from storage_options");
661                            }
662                            token = Some(v);
663                        } else {
664                            polars_bail!(ComputeError: "unknown configuration key for HF: {}", k)
665                        }
666                    }
667
668                    token = token
669                        .or_else(|| {
670                            let v = std::env::var("HF_TOKEN").ok();
671                            if v.is_some() && verbose {
672                                eprintln!("HF token sourced from HF_TOKEN env var");
673                            }
674                            v
675                        })
676                        .or_else(|| {
677                            let hf_home = std::env::var("HF_HOME");
678                            let hf_home = hf_home.as_deref();
679                            let hf_home = hf_home.unwrap_or("~/.cache/huggingface");
680                            let hf_home = resolve_homedir(&hf_home);
681                            let cached_token_path = hf_home.join("token");
682
683                            let v = std::string::String::from_utf8(
684                                std::fs::read(&cached_token_path).ok()?,
685                            )
686                            .ok()
687                            .filter(|x| !x.is_empty());
688
689                            if v.is_some() && verbose {
690                                eprintln!(
691                                    "HF token sourced from {}",
692                                    cached_token_path.to_str().unwrap()
693                                );
694                            }
695
696                            v
697                        });
698
699                    if let Some(v) = token {
700                        this.config = Some(CloudConfig::Http {
701                            headers: vec![("Authorization".into(), format!("Bearer {v}"))],
702                        })
703                    }
704
705                    Ok(this)
706                }
707                #[cfg(not(feature = "http"))]
708                {
709                    polars_bail!(ComputeError: "'http' feature is not enabled");
710                }
711            },
712        }
713    }
714
715    /// Python passes a credential provider builder that needs to be called to get the actual credential
716    /// provider.
717    #[cfg(feature = "cloud")]
718    fn initialized_credential_provider(
719        &self,
720        clear_cached_credentials: bool,
721    ) -> PolarsResult<Option<PlCredentialProvider>> {
722        if let Some(v) = self.credential_provider.clone() {
723            v.try_into_initialized(clear_cached_credentials)
724        } else {
725            Ok(None)
726        }
727    }
728}
729
730#[cfg(feature = "cloud")]
731#[cfg(test)]
732mod tests {
733    use hashbrown::HashMap;
734
735    use super::{parse_untyped_config, parse_url};
736
737    #[test]
738    fn test_parse_url() {
739        assert_eq!(
740            parse_url(r"http://Users/Jane Doe/data.csv")
741                .unwrap()
742                .as_str(),
743            "http://users/Jane%20Doe/data.csv"
744        );
745        assert_eq!(
746            parse_url(r"http://Users/Jane Doe/data.csv")
747                .unwrap()
748                .as_str(),
749            "http://users/Jane%20Doe/data.csv"
750        );
751        #[cfg(target_os = "windows")]
752        {
753            assert_eq!(
754                parse_url(r"file:///c:/Users/Jane Doe/data.csv")
755                    .unwrap()
756                    .as_str(),
757                "file:///c:/Users/Jane%20Doe/data.csv"
758            );
759            assert_eq!(
760                parse_url(r"file://\c:\Users\Jane Doe\data.csv")
761                    .unwrap()
762                    .as_str(),
763                "file:///c:/Users/Jane%20Doe/data.csv"
764            );
765            assert_eq!(
766                parse_url(r"c:\Users\Jane Doe\data.csv").unwrap().as_str(),
767                "file:///C:/Users/Jane%20Doe/data.csv"
768            );
769            assert_eq!(
770                parse_url(r"data.csv").unwrap().as_str(),
771                url::Url::from_file_path(
772                    [
773                        std::env::current_dir().unwrap().as_path(),
774                        std::path::Path::new("data.csv")
775                    ]
776                    .into_iter()
777                    .collect::<std::path::PathBuf>()
778                )
779                .unwrap()
780                .as_str()
781            );
782        }
783        #[cfg(not(target_os = "windows"))]
784        {
785            assert_eq!(
786                parse_url(r"file:///home/Jane Doe/data.csv")
787                    .unwrap()
788                    .as_str(),
789                "file:///home/Jane%20Doe/data.csv"
790            );
791            assert_eq!(
792                parse_url(r"/home/Jane Doe/data.csv").unwrap().as_str(),
793                "file:///home/Jane%20Doe/data.csv"
794            );
795            assert_eq!(
796                parse_url(r"data.csv").unwrap().as_str(),
797                url::Url::from_file_path(
798                    [
799                        std::env::current_dir().unwrap().as_path(),
800                        std::path::Path::new("data.csv")
801                    ]
802                    .into_iter()
803                    .collect::<std::path::PathBuf>()
804                )
805                .unwrap()
806                .as_str()
807            );
808        }
809    }
810    #[cfg(feature = "aws")]
811    #[test]
812    fn test_parse_untyped_config() {
813        use object_store::aws::AmazonS3ConfigKey;
814
815        let aws_config = [
816            ("aws_secret_access_key", "a_key"),
817            ("aws_s3_allow_unsafe_rename", "true"),
818        ]
819        .into_iter()
820        .collect::<HashMap<_, _>>();
821        let aws_keys = parse_untyped_config::<AmazonS3ConfigKey, _>(aws_config)
822            .expect("Parsing keys shouldn't have thrown an error");
823
824        assert_eq!(
825            aws_keys.first().unwrap().0,
826            AmazonS3ConfigKey::SecretAccessKey
827        );
828        assert_eq!(aws_keys.len(), 1);
829
830        let aws_config = [
831            ("AWS_SECRET_ACCESS_KEY", "a_key"),
832            ("aws_s3_allow_unsafe_rename", "true"),
833        ]
834        .into_iter()
835        .collect::<HashMap<_, _>>();
836        let aws_keys = parse_untyped_config::<AmazonS3ConfigKey, _>(aws_config)
837            .expect("Parsing keys shouldn't have thrown an error");
838
839        assert_eq!(
840            aws_keys.first().unwrap().0,
841            AmazonS3ConfigKey::SecretAccessKey
842        );
843        assert_eq!(aws_keys.len(), 1);
844    }
845}