1use std::borrow::{Borrow, Cow};
2use std::ffi::OsStr;
3use std::fmt::Display;
4use std::ops::{Deref, Range};
5use std::path::{Path, PathBuf};
6
7use polars_error::{PolarsResult, polars_err};
8
9use crate::format_pl_refstr;
10use crate::pl_str::PlRefStr;
11
12pub const WINDOWS_EXTPATH_PREFIX: &str = r#"\\?\"#;
15
16#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
22#[repr(transparent)]
23pub struct PlPath {
24 inner: str,
25}
26
27#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
28#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
29#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
30pub struct PlRefPath {
36 inner: PlRefStr,
37}
38
39impl PlPath {
40 fn _new<S: AsRef<str> + ?Sized>(s: &S) -> &PlPath {
42 let s: &str = s.as_ref();
43 unsafe { &*(s as *const str as *const PlPath) }
45 }
46
47 fn _try_from_path(path: &Path) -> PolarsResult<&PlPath> {
48 path.to_str()
49 .ok_or_else(|| polars_err!(non_utf8_path))
50 .map(Self::_new)
51 }
52
53 pub fn as_str(&self) -> &str {
54 unsafe { &*(self as *const PlPath as *const str) }
55 }
56
57 pub fn as_bytes(&self) -> &[u8] {
58 self.as_str().as_bytes()
59 }
60
61 pub fn as_os_str(&self) -> &OsStr {
62 OsStr::new(self)
63 }
64
65 pub fn as_std_path(&self) -> &Path {
66 Path::new(self)
67 }
68
69 pub fn to_ref_path(&self) -> PlRefPath {
70 PlRefPath::_new_no_normalize(self.as_str().into())
71 }
72
73 pub fn scheme(&self) -> Option<CloudScheme> {
74 CloudScheme::from_path(self.as_str())
75 }
76
77 pub fn has_scheme(&self) -> bool {
79 self.scheme().is_some()
80 }
81
82 pub fn strip_scheme(&self) -> &str {
84 &self.as_str()[self.scheme().map_or(0, |x| x.strip_scheme_index())..self.inner.len()]
85 }
86
87 pub fn file_name(&self) -> Option<&OsStr> {
88 Path::new(self.strip_scheme()).file_name()
89 }
90
91 pub fn extension(&self) -> Option<&str> {
92 Path::new(self.strip_scheme())
93 .extension()
94 .map(|x| x.to_str().unwrap())
95 }
96
97 pub fn parent(&self) -> Option<&str> {
98 Path::new(self.strip_scheme())
99 .parent()
100 .map(|x| x.to_str().unwrap())
101 }
102
103 pub fn sliced(&self, range: Range<usize>) -> &PlPath {
105 Self::_new(&self.as_str()[range])
106 }
107
108 pub fn strip_scheme_split_authority(&self) -> Option<(&'_ str, &'_ str)> {
122 match self.scheme() {
123 None | Some(CloudScheme::FileNoHostname) => Some(("", self.strip_scheme())),
124 Some(scheme) => {
125 let path_str = self.as_str();
126 let position = self.authority_end_position();
127
128 if position < path_str.len() {
129 assert!(path_str[position..].starts_with('/'));
130 }
131
132 (position < path_str.len()).then_some((
133 &path_str[scheme.strip_scheme_index()..position],
134 &path_str[position..],
135 ))
136 },
137 }
138 }
139
140 pub fn authority_end_position(&self) -> usize {
146 match self.scheme() {
147 None => 0,
148 Some(scheme @ CloudScheme::FileNoHostname) => scheme.strip_scheme_index(),
149 Some(_) => {
150 let after_scheme = self.strip_scheme();
151 let offset = self.as_str().len() - after_scheme.len();
152
153 offset + after_scheme.find('/').unwrap_or(after_scheme.len())
154 },
155 }
156 }
157
158 pub fn to_absolute_path(&self) -> PolarsResult<PlRefPath> {
159 PlRefPath::try_from_pathbuf(std::path::absolute(Path::new(self.strip_scheme()))?)
160 }
161
162 pub fn join(&self, other: impl AsRef<str>) -> PlRefPath {
163 let other = other.as_ref();
164
165 if CloudScheme::from_path(other).is_some()
166 || other.starts_with('/')
167 || other.starts_with('\\')
168 {
169 PlRefPath::new(other)
170 } else if CloudScheme::from_path(self.as_str()).is_some() {
171 let lhs = self.as_str().trim_end_matches('/');
172 PlRefPath::new(format!("{lhs}/{other}"))
173 } else {
174 PlRefPath::try_from_pathbuf(self.as_std_path().join(other)).unwrap()
175 }
176 }
177
178 pub fn normalize_windows_path(path_str: &str) -> Option<PlRefPath> {
180 let has_extpath_prefix = path_str.starts_with(WINDOWS_EXTPATH_PREFIX);
181
182 if has_extpath_prefix || cfg!(target_family = "windows") {
183 let path_str = path_str
184 .strip_prefix(WINDOWS_EXTPATH_PREFIX)
185 .unwrap_or(path_str);
186
187 if matches!(
188 CloudScheme::from_path(path_str),
189 None | Some(CloudScheme::File | CloudScheme::FileNoHostname)
190 ) && path_str.contains('\\')
191 {
192 let new_path = path_str.replace('\\', "/");
193 let inner = PlRefStr::from_string(new_path);
194 return Some(PlRefPath { inner });
195 }
196 }
197
198 None
199 }
200}
201
202impl AsRef<str> for PlPath {
203 fn as_ref(&self) -> &str {
204 self.as_str()
205 }
206}
207
208impl AsRef<OsStr> for PlPath {
209 fn as_ref(&self) -> &OsStr {
210 OsStr::new(self.as_str())
211 }
212}
213
214impl AsRef<Path> for PlPath {
215 fn as_ref(&self) -> &Path {
216 self.as_std_path()
217 }
218}
219
220impl From<&PlPath> for Box<PlPath> {
221 fn from(value: &PlPath) -> Self {
222 let s: &str = value.as_str();
223 let s: Box<str> = s.into();
224 let out: Box<PlPath> = unsafe { std::mem::transmute(s) };
226 out
227 }
228}
229
230impl Display for PlPath {
231 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
232 Display::fmt(self.as_str(), f)
233 }
234}
235
236impl PlRefPath {
237 pub fn empty() -> Self {
238 Self::default()
239 }
240
241 pub fn new(path: impl AsRef<str> + Into<PlRefStr>) -> Self {
243 if let Some(path) = PlPath::normalize_windows_path(path.as_ref()) {
244 return path;
245 }
246
247 Self::_new_no_normalize(path.into())
248 }
249
250 const fn _new_no_normalize(path: PlRefStr) -> Self {
251 Self { inner: path }
252 }
253
254 pub fn try_from_path(path: &Path) -> PolarsResult<PlRefPath> {
255 Ok(Self::new(PlPath::_try_from_path(path)?.as_str()))
256 }
257
258 pub fn try_from_pathbuf(path: PathBuf) -> PolarsResult<PlRefPath> {
259 Self::try_from_path(&path)
260 }
261
262 pub fn as_str(&self) -> &str {
263 &self.inner
264 }
265
266 pub fn as_ref_str(&self) -> &PlRefStr {
267 &self.inner
268 }
269
270 pub fn into_ref_str(self) -> PlRefStr {
271 self.inner
272 }
273
274 pub fn sliced(&self, range: Range<usize>) -> PlRefPath {
276 if range == (0..self.as_str().len()) {
277 self.clone()
278 } else {
279 Self::_new_no_normalize(PlPath::sliced(self, range).as_str().into())
280 }
281 }
282
283 pub fn to_absolute_path(&self) -> PolarsResult<Cow<'_, PlRefPath>> {
286 Ok(if self.has_scheme() || self.as_std_path().is_absolute() {
287 Cow::Borrowed(self)
288 } else {
289 Cow::Owned(PlPath::to_absolute_path(self)?)
290 })
291 }
292
293 pub fn ptr_eq(this: &Self, other: &Self) -> bool {
295 PlRefStr::ptr_eq(this.as_ref_str(), other.as_ref_str())
296 }
297}
298
299impl AsRef<str> for PlRefPath {
300 fn as_ref(&self) -> &str {
301 self.as_str()
302 }
303}
304
305impl AsRef<OsStr> for PlRefPath {
306 fn as_ref(&self) -> &OsStr {
307 self.as_os_str()
308 }
309}
310
311impl AsRef<Path> for PlRefPath {
312 fn as_ref(&self) -> &Path {
313 self.as_std_path()
314 }
315}
316
317impl Deref for PlRefPath {
318 type Target = PlPath;
319
320 fn deref(&self) -> &Self::Target {
321 PlPath::_new(self)
322 }
323}
324
325impl Display for PlRefPath {
326 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
327 Display::fmt(self.as_str(), f)
328 }
329}
330
331impl ToOwned for PlPath {
332 type Owned = PlRefPath;
333
334 fn to_owned(&self) -> Self::Owned {
335 self.to_ref_path()
336 }
337}
338
339impl Borrow<PlPath> for PlRefPath {
340 fn borrow(&self) -> &PlPath {
341 self
342 }
343}
344
345impl From<&str> for PlRefPath {
346 fn from(value: &str) -> Self {
347 Self::new(value)
348 }
349}
350
351macro_rules! impl_cloud_scheme {
352 ($($t:ident = $n:literal,)+) => {
353 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
354 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
355 #[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
356 pub enum CloudScheme {
357 $($t,)+
358 }
359
360 impl CloudScheme {
361 #[expect(unreachable_patterns)]
364 fn from_scheme_str(s: &str) -> Option<Self> {
365 Some(match s {
366 $($n => Self::$t,)+
367 _ => return None,
368 })
369 }
370
371 pub const fn as_str(&self) -> &'static str {
372 match self {
373 $(Self::$t => $n,)+
374 }
375 }
376 }
377 };
378}
379
380impl_cloud_scheme! {
381 Abfs = "abfs",
382 Abfss = "abfss",
383 Adl = "adl",
384 Az = "az",
385 Azure = "azure",
386 File = "file",
387 FileNoHostname = "file",
388 Gcs = "gcs",
389 Gs = "gs",
390 Hf = "hf",
391 Http = "http",
392 Https = "https",
393 S3 = "s3",
394 S3a = "s3a",
395}
396
397impl CloudScheme {
398 pub fn from_path(path: &str) -> Option<Self> {
399 if let Some(stripped) = path.strip_prefix("file:") {
400 return Some(if stripped.starts_with("//") {
401 Self::File
402 } else {
403 Self::FileNoHostname
404 });
405 }
406
407 Self::from_scheme_str(&path[..path.find("://")?])
408 }
409
410 pub fn strip_scheme_index(&self) -> usize {
413 if let Self::FileNoHostname = self {
414 5
415 } else {
416 self.as_str().len() + 3
417 }
418 }
419}
420
421impl Display for CloudScheme {
422 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
423 Display::fmt(self.as_str(), f)
424 }
425}
426
427pub fn format_file_uri(absolute_local_path: &str) -> PlRefPath {
432 if cfg!(target_family = "windows") || absolute_local_path.starts_with(WINDOWS_EXTPATH_PREFIX) {
442 if let Some(path) = PlPath::normalize_windows_path(absolute_local_path) {
443 PlRefPath::new(format_pl_refstr!("file:///{path}"))
444 } else {
445 PlRefPath::new(format_pl_refstr!("file:///{absolute_local_path}"))
446 }
447 } else {
448 PlRefPath::new(format_pl_refstr!("file://{absolute_local_path}"))
449 }
450}
451
452#[cfg(test)]
453mod tests {
454 use super::*;
455
456 #[test]
457 fn test_plpath_file() {
458 let p = PlRefPath::new("file:///home/user");
459 assert_eq!(
460 (
461 p.scheme(),
462 p.scheme().map(|x| x.as_str()),
463 p.as_str(),
464 p.strip_scheme(),
465 ),
466 (
467 Some(CloudScheme::File),
468 Some("file"),
469 "file:///home/user",
470 "/home/user"
471 )
472 );
473
474 let p = PlRefPath::new("file:/home/user");
475 assert_eq!(
476 (
477 p.scheme(),
478 p.scheme().map(|x| x.as_str()),
479 p.as_str(),
480 p.strip_scheme(),
481 ),
482 (
483 Some(CloudScheme::FileNoHostname),
484 Some("file"),
485 "file:/home/user",
486 "/home/user"
487 )
488 );
489
490 assert_eq!(PlRefPath::new("file://").scheme(), Some(CloudScheme::File));
491
492 assert_eq!(
493 PlRefPath::new("file://").strip_scheme_split_authority(),
494 None
495 );
496
497 assert_eq!(
498 PlRefPath::new("file:///").strip_scheme_split_authority(),
499 Some(("", "/"))
500 );
501
502 assert_eq!(
503 PlRefPath::new("file:///path").strip_scheme_split_authority(),
504 Some(("", "/path"))
505 );
506
507 assert_eq!(
508 PlRefPath::new("file://hostname:80/path").strip_scheme_split_authority(),
509 Some(("hostname:80", "/path"))
510 );
511
512 assert_eq!(
513 PlRefPath::new("file:").scheme(),
514 Some(CloudScheme::FileNoHostname)
515 );
516 assert_eq!(
517 PlRefPath::new("file:/").scheme(),
518 Some(CloudScheme::FileNoHostname)
519 );
520 assert_eq!(
521 PlRefPath::new("file:").strip_scheme_split_authority(),
522 Some(("", ""))
523 );
524 assert_eq!(
525 PlRefPath::new("file:/Local/path").strip_scheme_split_authority(),
526 Some(("", "/Local/path"))
527 );
528
529 assert_eq!(
530 PlRefPath::new(r#"\\?\C:\Windows\system32"#).as_str(),
531 "C:/Windows/system32"
532 );
533 }
534
535 #[test]
536 fn test_plpath_join() {
537 assert_eq!(
538 PlRefPath::new("s3://.../...").join("az://.../...").as_str(),
539 "az://.../..."
540 );
541
542 assert_eq!(
543 PlRefPath::new("s3://.../...")
544 .join("a=1/b=1/00000000.parquet")
545 .as_str(),
546 "s3://.../.../a=1/b=1/00000000.parquet"
547 );
548
549 assert_eq!(
550 PlRefPath::new("s3://.../...//")
551 .join("a=1/b=1/00000000.parquet")
552 .as_str(),
553 "s3://.../.../a=1/b=1/00000000.parquet"
554 );
555
556 fn _assert_plpath_join(base: &str, added: &str, expect: &str) {
557 let expect = PlRefPath::new(expect);
559 let base = base.replace('/', std::path::MAIN_SEPARATOR_STR);
560 let added = added.replace('/', std::path::MAIN_SEPARATOR_STR);
561
562 assert_eq!(PlRefPath::new(&base).join(&added), expect);
563
564 let uri_base = format_file_uri(&base);
566 let expect_uri = if added.starts_with(std::path::MAIN_SEPARATOR_STR) {
567 expect.clone()
568 } else {
569 format_file_uri(expect.as_str())
570 };
571
572 assert_eq!(PlRefPath::new(uri_base.as_str()).join(added), expect_uri);
573 }
574
575 macro_rules! assert_plpath_join {
576 ($base:literal + $added:literal => $expect:literal) => {
577 _assert_plpath_join($base, $added, $expect)
578 };
579 }
580
581 assert_plpath_join!("a/b/c/" + "d/e" => "a/b/c/d/e");
582 assert_plpath_join!("a/b/c" + "d/e" => "a/b/c/d/e");
583 assert_plpath_join!("a/b/c" + "d/e/" => "a/b/c/d/e/");
584 assert_plpath_join!("a/b/c" + "/d" => "/d");
585 assert_plpath_join!("a/b/c" + "/d/" => "/d/");
586 assert_plpath_join!("" + "/d/" => "/d/");
587 assert_plpath_join!("/" + "/d/" => "/d/");
588 assert_plpath_join!("/x/y" + "/d/" => "/d/");
589 assert_plpath_join!("/x/y" + "/d" => "/d");
590 assert_plpath_join!("/x/y" + "d" => "/x/y/d");
591
592 assert_plpath_join!("/a/longer" + "path" => "/a/longer/path");
593 assert_plpath_join!("/a/longer" + "/path" => "/path");
594 assert_plpath_join!("/a/longer" + "path/test" => "/a/longer/path/test");
595 assert_plpath_join!("/a/longer" + "/path/test" => "/path/test");
596 }
597
598 #[test]
599 fn test_plpath_name() {
600 assert_eq!(PlRefPath::new("s3://...").file_name(), Some("...".as_ref()));
601 assert_eq!(
602 PlRefPath::new("a/b/file.parquet").file_name(),
603 Some("file.parquet".as_ref())
604 );
605 assert_eq!(
606 PlRefPath::new("file.parquet").file_name(),
607 Some("file.parquet".as_ref())
608 );
609
610 assert_eq!(PlRefPath::new("s3://").file_name(), None);
611 assert_eq!(PlRefPath::new("").file_name(), None);
612 }
613}