1use std::borrow::{Borrow, Cow};
2use std::ffi::OsStr;
3use std::fmt::Display;
4use std::ops::{Deref, Range};
5use std::path::{Path, PathBuf};
6
7use polars_error::{PolarsResult, polars_err};
8
9use crate::format_pl_refstr;
10use crate::pl_str::PlRefStr;
11
12pub const WINDOWS_EXTPATH_PREFIX: &str = r#"\\?\"#;
15
16#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
22#[repr(transparent)]
23pub struct PlPath {
24 inner: str,
25}
26
27#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
28pub struct PlRefPath {
37 inner: PlRefStr,
38}
39
40impl PlPath {
41 fn _new<S: AsRef<str> + ?Sized>(s: &S) -> &PlPath {
43 let s: &str = s.as_ref();
44 unsafe { &*(s as *const str as *const PlPath) }
46 }
47
48 fn _try_from_path(path: &Path) -> PolarsResult<&PlPath> {
49 path.to_str()
50 .ok_or_else(|| polars_err!(non_utf8_path))
51 .map(Self::_new)
52 }
53
54 pub fn as_str(&self) -> &str {
55 unsafe { &*(self as *const PlPath as *const str) }
56 }
57
58 pub fn as_bytes(&self) -> &[u8] {
59 self.as_str().as_bytes()
60 }
61
62 pub fn as_os_str(&self) -> &OsStr {
63 OsStr::new(self)
64 }
65
66 pub fn as_std_path(&self) -> &Path {
67 Path::new(self)
68 }
69
70 pub fn to_ref_path(&self) -> PlRefPath {
71 PlRefPath::_new_no_normalize(self.as_str().into())
72 }
73
74 pub fn scheme(&self) -> Option<CloudScheme> {
75 CloudScheme::from_path(self.as_str())
76 }
77
78 pub fn has_scheme(&self) -> bool {
80 self.scheme().is_some()
81 }
82
83 pub fn strip_scheme(&self) -> &str {
85 &self.as_str()[self.scheme().map_or(0, |x| x.strip_scheme_index())..self.inner.len()]
86 }
87
88 pub fn file_name(&self) -> Option<&OsStr> {
89 Path::new(self.strip_scheme()).file_name()
90 }
91
92 pub fn extension(&self) -> Option<&str> {
93 Path::new(self.strip_scheme())
94 .extension()
95 .map(|x| x.to_str().unwrap())
96 }
97
98 pub fn parent(&self) -> Option<&str> {
99 Path::new(self.strip_scheme())
100 .parent()
101 .map(|x| x.to_str().unwrap())
102 }
103
104 pub fn sliced(&self, range: Range<usize>) -> &PlPath {
106 Self::_new(&self.as_str()[range])
107 }
108
109 pub fn strip_scheme_split_authority(&self) -> Option<(&'_ str, &'_ str)> {
123 match self.scheme() {
124 None | Some(CloudScheme::FileNoHostname) => Some(("", self.strip_scheme())),
125 Some(scheme) => {
126 let path_str = self.as_str();
127 let position = self.authority_end_position();
128
129 if position < path_str.len() {
130 assert!(path_str[position..].starts_with('/'));
131 }
132
133 (position < path_str.len()).then_some((
134 &path_str[scheme.strip_scheme_index()..position],
135 &path_str[position..],
136 ))
137 },
138 }
139 }
140
141 pub fn authority_end_position(&self) -> usize {
147 match self.scheme() {
148 None => 0,
149 Some(scheme @ CloudScheme::FileNoHostname) => scheme.strip_scheme_index(),
150 Some(_) => {
151 let after_scheme = self.strip_scheme();
152 let offset = self.as_str().len() - after_scheme.len();
153
154 offset + after_scheme.find('/').unwrap_or(after_scheme.len())
155 },
156 }
157 }
158
159 pub fn to_absolute_path(&self) -> PolarsResult<PlRefPath> {
160 PlRefPath::try_from_pathbuf(std::path::absolute(Path::new(self.strip_scheme()))?)
161 }
162
163 pub fn join(&self, other: impl AsRef<str>) -> PlRefPath {
164 let other = other.as_ref();
165
166 if CloudScheme::from_path(other).is_some() {
167 PlRefPath::new(other)
168 } else {
169 PlRefPath::try_from_pathbuf(self.as_std_path().join(other)).unwrap()
170 }
171 }
172
173 pub fn normalize_windows_path(path_str: &str) -> Option<PlRefPath> {
175 let has_extpath_prefix = path_str.starts_with(WINDOWS_EXTPATH_PREFIX);
176
177 if has_extpath_prefix || cfg!(target_family = "windows") {
178 let path_str = path_str
179 .strip_prefix(WINDOWS_EXTPATH_PREFIX)
180 .unwrap_or(path_str);
181
182 if matches!(
183 CloudScheme::from_path(path_str),
184 None | Some(CloudScheme::File | CloudScheme::FileNoHostname)
185 ) && path_str.contains('\\')
186 {
187 let new_path = path_str.replace('\\', "/");
188 let inner = PlRefStr::from_string(new_path);
189 return Some(PlRefPath { inner });
190 }
191 }
192
193 None
194 }
195}
196
197impl AsRef<str> for PlPath {
198 fn as_ref(&self) -> &str {
199 self.as_str()
200 }
201}
202
203impl AsRef<OsStr> for PlPath {
204 fn as_ref(&self) -> &OsStr {
205 OsStr::new(self.as_str())
206 }
207}
208
209impl AsRef<Path> for PlPath {
210 fn as_ref(&self) -> &Path {
211 self.as_std_path()
212 }
213}
214
215impl From<&PlPath> for Box<PlPath> {
216 fn from(value: &PlPath) -> Self {
217 let s: &str = value.as_str();
218 let s: Box<str> = s.into();
219 let out: Box<PlPath> = unsafe { std::mem::transmute(s) };
221 out
222 }
223}
224
225impl Display for PlPath {
226 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
227 Display::fmt(self.as_str(), f)
228 }
229}
230
231impl PlRefPath {
232 pub fn empty() -> Self {
233 Self::default()
234 }
235
236 pub fn new(path: impl AsRef<str> + Into<PlRefStr>) -> Self {
238 if let Some(path) = PlPath::normalize_windows_path(path.as_ref()) {
239 return path;
240 }
241
242 Self::_new_no_normalize(path.into())
243 }
244
245 const fn _new_no_normalize(path: PlRefStr) -> Self {
246 Self { inner: path }
247 }
248
249 pub fn try_from_path(path: &Path) -> PolarsResult<PlRefPath> {
250 Ok(Self::new(PlPath::_try_from_path(path)?.as_str()))
251 }
252
253 pub fn try_from_pathbuf(path: PathBuf) -> PolarsResult<PlRefPath> {
254 Self::try_from_path(&path)
255 }
256
257 pub fn as_str(&self) -> &str {
258 &self.inner
259 }
260
261 pub fn as_ref_str(&self) -> &PlRefStr {
262 &self.inner
263 }
264
265 pub fn into_ref_str(self) -> PlRefStr {
266 self.inner
267 }
268
269 pub fn sliced(&self, range: Range<usize>) -> PlRefPath {
271 if range == (0..self.as_str().len()) {
272 self.clone()
273 } else {
274 Self::_new_no_normalize(PlPath::sliced(self, range).as_str().into())
275 }
276 }
277
278 pub fn to_absolute_path(&self) -> PolarsResult<Cow<'_, PlRefPath>> {
281 Ok(if self.has_scheme() || self.as_std_path().is_absolute() {
282 Cow::Borrowed(self)
283 } else {
284 Cow::Owned(PlPath::to_absolute_path(self)?)
285 })
286 }
287
288 pub fn ptr_eq(this: &Self, other: &Self) -> bool {
290 PlRefStr::ptr_eq(this.as_ref_str(), other.as_ref_str())
291 }
292}
293
294impl AsRef<str> for PlRefPath {
295 fn as_ref(&self) -> &str {
296 self.as_str()
297 }
298}
299
300impl AsRef<OsStr> for PlRefPath {
301 fn as_ref(&self) -> &OsStr {
302 self.as_os_str()
303 }
304}
305
306impl AsRef<Path> for PlRefPath {
307 fn as_ref(&self) -> &Path {
308 self.as_std_path()
309 }
310}
311
312impl Deref for PlRefPath {
313 type Target = PlPath;
314
315 fn deref(&self) -> &Self::Target {
316 PlPath::_new(self)
317 }
318}
319
320impl Display for PlRefPath {
321 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
322 Display::fmt(self.as_str(), f)
323 }
324}
325
326impl ToOwned for PlPath {
327 type Owned = PlRefPath;
328
329 fn to_owned(&self) -> Self::Owned {
330 self.to_ref_path()
331 }
332}
333
334impl Borrow<PlPath> for PlRefPath {
335 fn borrow(&self) -> &PlPath {
336 self
337 }
338}
339
340impl From<&str> for PlRefPath {
341 fn from(value: &str) -> Self {
342 Self::new(value)
343 }
344}
345
346macro_rules! impl_cloud_scheme {
347 ($($t:ident = $n:literal,)+) => {
348 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
349 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
350 #[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
351 pub enum CloudScheme {
352 $($t,)+
353 }
354
355 impl CloudScheme {
356 #[expect(unreachable_patterns)]
359 fn from_scheme_str(s: &str) -> Option<Self> {
360 Some(match s {
361 $($n => Self::$t,)+
362 _ => return None,
363 })
364 }
365
366 pub const fn as_str(&self) -> &'static str {
367 match self {
368 $(Self::$t => $n,)+
369 }
370 }
371 }
372 };
373}
374
375const MAX_SCHEME_LEN: usize = 8;
377impl_cloud_scheme! {
378 Abfs = "abfs",
379 Abfss = "abfss",
380 Adl = "adl",
381 Az = "az",
382 Azure = "azure",
383 File = "file",
384 FileNoHostname = "file",
385 Gcs = "gcs",
386 Gs = "gs",
387 Hf = "hf",
388 Http = "http",
389 Https = "https",
390 S3 = "s3",
391 S3a = "s3a",
392}
393
394impl CloudScheme {
395 pub fn from_path(mut path: &str) -> Option<Self> {
396 if let Some(stripped) = path.strip_prefix("file:") {
397 return Some(if stripped.starts_with("//") {
398 Self::File
399 } else {
400 Self::FileNoHostname
401 });
402 }
403
404 if path.len() > MAX_SCHEME_LEN {
405 path = &path[..MAX_SCHEME_LEN]
406 }
407
408 Self::from_scheme_str(&path[..path.find("://")?])
409 }
410
411 pub fn strip_scheme_index(&self) -> usize {
414 if let Self::FileNoHostname = self {
415 5
416 } else {
417 self.as_str().len() + 3
418 }
419 }
420}
421
422impl Display for CloudScheme {
423 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
424 Display::fmt(self.as_str(), f)
425 }
426}
427
428pub fn format_file_uri(absolute_local_path: &str) -> PlRefPath {
433 if cfg!(target_family = "windows") || absolute_local_path.starts_with(WINDOWS_EXTPATH_PREFIX) {
443 if let Some(path) = PlPath::normalize_windows_path(absolute_local_path) {
444 PlRefPath::new(format_pl_refstr!("file:///{path}"))
445 } else {
446 PlRefPath::new(format_pl_refstr!("file:///{absolute_local_path}"))
447 }
448 } else {
449 PlRefPath::new(format_pl_refstr!("file://{absolute_local_path}"))
450 }
451}
452
453#[cfg(feature = "serde")]
454mod _serde_impl {
455 use serde::{Deserialize, Serialize};
456
457 use super::super::plpath::PlPath as LegacyPlPath;
458 use crate::pl_path::PlRefPath;
459
460 impl Serialize for PlRefPath {
461 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
462 where
463 S: serde::Serializer,
464 {
465 LegacyPlPath::serialize(&self.clone().into(), serializer)
466 }
467 }
468
469 impl<'de> Deserialize<'de> for PlRefPath {
470 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
471 where
472 D: serde::Deserializer<'de>,
473 {
474 LegacyPlPath::deserialize(deserializer).map(Into::into)
475 }
476 }
477}
478
479#[cfg(feature = "dsl-schema")]
480use super::plpath::PlPath as LegacyPlPath;
481
482#[cfg(feature = "dsl-schema")]
483impl schemars::JsonSchema for PlRefPath {
484 fn schema_name() -> std::borrow::Cow<'static, str> {
485 LegacyPlPath::schema_name()
486 }
487
488 fn schema_id() -> std::borrow::Cow<'static, str> {
489 LegacyPlPath::schema_id()
490 }
491
492 fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
493 LegacyPlPath::json_schema(generator)
494 }
495}
496
497#[cfg(test)]
498mod tests {
499 use super::*;
500
501 #[test]
502 fn test_plpath_file() {
503 let p = PlRefPath::new("file:///home/user");
504 assert_eq!(
505 (
506 p.scheme(),
507 p.scheme().map(|x| x.as_str()),
508 p.as_str(),
509 p.strip_scheme(),
510 ),
511 (
512 Some(CloudScheme::File),
513 Some("file"),
514 "file:///home/user",
515 "/home/user"
516 )
517 );
518
519 let p = PlRefPath::new("file:/home/user");
520 assert_eq!(
521 (
522 p.scheme(),
523 p.scheme().map(|x| x.as_str()),
524 p.as_str(),
525 p.strip_scheme(),
526 ),
527 (
528 Some(CloudScheme::FileNoHostname),
529 Some("file"),
530 "file:/home/user",
531 "/home/user"
532 )
533 );
534
535 assert_eq!(PlRefPath::new("file://").scheme(), Some(CloudScheme::File));
536
537 assert_eq!(
538 PlRefPath::new("file://").strip_scheme_split_authority(),
539 None
540 );
541
542 assert_eq!(
543 PlRefPath::new("file:///").strip_scheme_split_authority(),
544 Some(("", "/"))
545 );
546
547 assert_eq!(
548 PlRefPath::new("file:///path").strip_scheme_split_authority(),
549 Some(("", "/path"))
550 );
551
552 assert_eq!(
553 PlRefPath::new("file://hostname:80/path").strip_scheme_split_authority(),
554 Some(("hostname:80", "/path"))
555 );
556
557 assert_eq!(
558 PlRefPath::new("file:").scheme(),
559 Some(CloudScheme::FileNoHostname)
560 );
561 assert_eq!(
562 PlRefPath::new("file:/").scheme(),
563 Some(CloudScheme::FileNoHostname)
564 );
565 assert_eq!(
566 PlRefPath::new("file:").strip_scheme_split_authority(),
567 Some(("", ""))
568 );
569 assert_eq!(
570 PlRefPath::new("file:/Local/path").strip_scheme_split_authority(),
571 Some(("", "/Local/path"))
572 );
573
574 assert_eq!(
575 PlRefPath::new(r#"\\?\C:\Windows\system32"#).as_str(),
576 "C:/Windows/system32"
577 );
578 }
579
580 #[test]
581 fn test_plpath_join() {
582 assert_eq!(
583 PlRefPath::new("s3://.../...").join("az://.../...").as_str(),
584 "az://.../..."
585 );
586
587 fn _assert_plpath_join(base: &str, added: &str, expect: &str) {
588 let expect = PlRefPath::new(expect);
590 let base = base.replace('/', std::path::MAIN_SEPARATOR_STR);
591 let added = added.replace('/', std::path::MAIN_SEPARATOR_STR);
592
593 assert_eq!(PlRefPath::new(&base).join(&added), expect);
594
595 let uri_base = format_file_uri(&base);
597 let expect_uri = if added.starts_with(std::path::MAIN_SEPARATOR_STR) {
598 expect.clone()
599 } else {
600 format_file_uri(expect.as_str())
601 };
602
603 assert_eq!(PlRefPath::new(uri_base.as_str()).join(added), expect_uri);
604 }
605
606 macro_rules! assert_plpath_join {
607 ($base:literal + $added:literal => $expect:literal) => {
608 _assert_plpath_join($base, $added, $expect)
609 };
610 }
611
612 assert_plpath_join!("a/b/c/" + "d/e" => "a/b/c/d/e");
613 assert_plpath_join!("a/b/c" + "d/e" => "a/b/c/d/e");
614 assert_plpath_join!("a/b/c" + "d/e/" => "a/b/c/d/e/");
615 assert_plpath_join!("a/b/c" + "/d" => "/d");
616 assert_plpath_join!("a/b/c" + "/d/" => "/d/");
617 assert_plpath_join!("" + "/d/" => "/d/");
618 assert_plpath_join!("/" + "/d/" => "/d/");
619 assert_plpath_join!("/x/y" + "/d/" => "/d/");
620 assert_plpath_join!("/x/y" + "/d" => "/d");
621 assert_plpath_join!("/x/y" + "d" => "/x/y/d");
622
623 assert_plpath_join!("/a/longer" + "path" => "/a/longer/path");
624 assert_plpath_join!("/a/longer" + "/path" => "/path");
625 assert_plpath_join!("/a/longer" + "path/test" => "/a/longer/path/test");
626 assert_plpath_join!("/a/longer" + "/path/test" => "/path/test");
627 }
628
629 #[test]
630 fn test_plpath_name() {
631 assert_eq!(PlRefPath::new("s3://...").file_name(), Some("...".as_ref()));
632 assert_eq!(
633 PlRefPath::new("a/b/file.parquet").file_name(),
634 Some("file.parquet".as_ref())
635 );
636 assert_eq!(
637 PlRefPath::new("file.parquet").file_name(),
638 Some("file.parquet".as_ref())
639 );
640
641 assert_eq!(PlRefPath::new("s3://").file_name(), None);
642 assert_eq!(PlRefPath::new("").file_name(), None);
643 }
644}