1use std::borrow::{Borrow, Cow};
2use std::ffi::OsStr;
3use std::fmt::Display;
4use std::ops::{Deref, Range};
5use std::path::{Path, PathBuf};
6
7use polars_error::{PolarsResult, polars_err};
8
9use crate::format_pl_refstr;
10use crate::pl_str::PlRefStr;
11
12pub const WINDOWS_EXTPATH_PREFIX: &str = r#"\\?\"#;
15
16#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
22#[repr(transparent)]
23pub struct PlPath {
24 inner: str,
25}
26
27#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
28#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
29#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
30pub struct PlRefPath {
36 inner: PlRefStr,
37}
38
39impl PlPath {
40 fn _new<S: AsRef<str> + ?Sized>(s: &S) -> &PlPath {
42 let s: &str = s.as_ref();
43 unsafe { &*(s as *const str as *const PlPath) }
45 }
46
47 fn _try_from_path(path: &Path) -> PolarsResult<&PlPath> {
48 path.to_str()
49 .ok_or_else(|| polars_err!(non_utf8_path))
50 .map(Self::_new)
51 }
52
53 pub fn as_str(&self) -> &str {
54 unsafe { &*(self as *const PlPath as *const str) }
55 }
56
57 pub fn as_bytes(&self) -> &[u8] {
58 self.as_str().as_bytes()
59 }
60
61 pub fn as_os_str(&self) -> &OsStr {
62 OsStr::new(self)
63 }
64
65 pub fn as_std_path(&self) -> &Path {
66 Path::new(self)
67 }
68
69 pub fn to_ref_path(&self) -> PlRefPath {
70 PlRefPath::_new_no_normalize(self.as_str().into())
71 }
72
73 pub fn scheme(&self) -> Option<CloudScheme> {
74 CloudScheme::from_path(self.as_str())
75 }
76
77 pub fn has_scheme(&self) -> bool {
79 self.scheme().is_some()
80 }
81
82 pub fn strip_scheme(&self) -> &str {
84 &self.as_str()[self.scheme().map_or(0, |x| x.strip_scheme_index())..self.inner.len()]
85 }
86
87 pub fn file_name(&self) -> Option<&OsStr> {
88 Path::new(self.strip_scheme()).file_name()
89 }
90
91 pub fn extension(&self) -> Option<&str> {
92 Path::new(self.strip_scheme())
93 .extension()
94 .map(|x| x.to_str().unwrap())
95 }
96
97 pub fn parent(&self) -> Option<&str> {
98 Path::new(self.strip_scheme())
99 .parent()
100 .map(|x| x.to_str().unwrap())
101 }
102
103 pub fn sliced(&self, range: Range<usize>) -> &PlPath {
105 Self::_new(&self.as_str()[range])
106 }
107
108 pub fn strip_scheme_split_authority(&self) -> Option<(&'_ str, &'_ str)> {
122 match self.scheme() {
123 None | Some(CloudScheme::FileNoHostname) => Some(("", self.strip_scheme())),
124 Some(scheme) => {
125 let path_str = self.as_str();
126 let position = self.authority_end_position();
127
128 if position < path_str.len() {
129 assert!(path_str[position..].starts_with('/'));
130 }
131
132 (position < path_str.len()).then_some((
133 &path_str[scheme.strip_scheme_index()..position],
134 &path_str[position..],
135 ))
136 },
137 }
138 }
139
140 pub fn authority_end_position(&self) -> usize {
146 match self.scheme() {
147 None => 0,
148 Some(scheme @ CloudScheme::FileNoHostname) => scheme.strip_scheme_index(),
149 Some(_) => {
150 let after_scheme = self.strip_scheme();
151 let offset = self.as_str().len() - after_scheme.len();
152
153 offset + after_scheme.find('/').unwrap_or(after_scheme.len())
154 },
155 }
156 }
157
158 pub fn to_absolute_path(&self) -> PolarsResult<PlRefPath> {
159 PlRefPath::try_from_pathbuf(std::path::absolute(Path::new(self.strip_scheme()))?)
160 }
161
162 pub fn join(&self, other: impl AsRef<str>) -> PlRefPath {
163 let other = other.as_ref();
164
165 if CloudScheme::from_path(other).is_some() {
166 PlRefPath::new(other)
167 } else {
168 PlRefPath::try_from_pathbuf(self.as_std_path().join(other)).unwrap()
169 }
170 }
171
172 pub fn normalize_windows_path(path_str: &str) -> Option<PlRefPath> {
174 let has_extpath_prefix = path_str.starts_with(WINDOWS_EXTPATH_PREFIX);
175
176 if has_extpath_prefix || cfg!(target_family = "windows") {
177 let path_str = path_str
178 .strip_prefix(WINDOWS_EXTPATH_PREFIX)
179 .unwrap_or(path_str);
180
181 if matches!(
182 CloudScheme::from_path(path_str),
183 None | Some(CloudScheme::File | CloudScheme::FileNoHostname)
184 ) && path_str.contains('\\')
185 {
186 let new_path = path_str.replace('\\', "/");
187 let inner = PlRefStr::from_string(new_path);
188 return Some(PlRefPath { inner });
189 }
190 }
191
192 None
193 }
194}
195
196impl AsRef<str> for PlPath {
197 fn as_ref(&self) -> &str {
198 self.as_str()
199 }
200}
201
202impl AsRef<OsStr> for PlPath {
203 fn as_ref(&self) -> &OsStr {
204 OsStr::new(self.as_str())
205 }
206}
207
208impl AsRef<Path> for PlPath {
209 fn as_ref(&self) -> &Path {
210 self.as_std_path()
211 }
212}
213
214impl From<&PlPath> for Box<PlPath> {
215 fn from(value: &PlPath) -> Self {
216 let s: &str = value.as_str();
217 let s: Box<str> = s.into();
218 let out: Box<PlPath> = unsafe { std::mem::transmute(s) };
220 out
221 }
222}
223
224impl Display for PlPath {
225 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
226 Display::fmt(self.as_str(), f)
227 }
228}
229
230impl PlRefPath {
231 pub fn empty() -> Self {
232 Self::default()
233 }
234
235 pub fn new(path: impl AsRef<str> + Into<PlRefStr>) -> Self {
237 if let Some(path) = PlPath::normalize_windows_path(path.as_ref()) {
238 return path;
239 }
240
241 Self::_new_no_normalize(path.into())
242 }
243
244 const fn _new_no_normalize(path: PlRefStr) -> Self {
245 Self { inner: path }
246 }
247
248 pub fn try_from_path(path: &Path) -> PolarsResult<PlRefPath> {
249 Ok(Self::new(PlPath::_try_from_path(path)?.as_str()))
250 }
251
252 pub fn try_from_pathbuf(path: PathBuf) -> PolarsResult<PlRefPath> {
253 Self::try_from_path(&path)
254 }
255
256 pub fn as_str(&self) -> &str {
257 &self.inner
258 }
259
260 pub fn as_ref_str(&self) -> &PlRefStr {
261 &self.inner
262 }
263
264 pub fn into_ref_str(self) -> PlRefStr {
265 self.inner
266 }
267
268 pub fn sliced(&self, range: Range<usize>) -> PlRefPath {
270 if range == (0..self.as_str().len()) {
271 self.clone()
272 } else {
273 Self::_new_no_normalize(PlPath::sliced(self, range).as_str().into())
274 }
275 }
276
277 pub fn to_absolute_path(&self) -> PolarsResult<Cow<'_, PlRefPath>> {
280 Ok(if self.has_scheme() || self.as_std_path().is_absolute() {
281 Cow::Borrowed(self)
282 } else {
283 Cow::Owned(PlPath::to_absolute_path(self)?)
284 })
285 }
286
287 pub fn ptr_eq(this: &Self, other: &Self) -> bool {
289 PlRefStr::ptr_eq(this.as_ref_str(), other.as_ref_str())
290 }
291}
292
293impl AsRef<str> for PlRefPath {
294 fn as_ref(&self) -> &str {
295 self.as_str()
296 }
297}
298
299impl AsRef<OsStr> for PlRefPath {
300 fn as_ref(&self) -> &OsStr {
301 self.as_os_str()
302 }
303}
304
305impl AsRef<Path> for PlRefPath {
306 fn as_ref(&self) -> &Path {
307 self.as_std_path()
308 }
309}
310
311impl Deref for PlRefPath {
312 type Target = PlPath;
313
314 fn deref(&self) -> &Self::Target {
315 PlPath::_new(self)
316 }
317}
318
319impl Display for PlRefPath {
320 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
321 Display::fmt(self.as_str(), f)
322 }
323}
324
325impl ToOwned for PlPath {
326 type Owned = PlRefPath;
327
328 fn to_owned(&self) -> Self::Owned {
329 self.to_ref_path()
330 }
331}
332
333impl Borrow<PlPath> for PlRefPath {
334 fn borrow(&self) -> &PlPath {
335 self
336 }
337}
338
339impl From<&str> for PlRefPath {
340 fn from(value: &str) -> Self {
341 Self::new(value)
342 }
343}
344
345macro_rules! impl_cloud_scheme {
346 ($($t:ident = $n:literal,)+) => {
347 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
348 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
349 #[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
350 pub enum CloudScheme {
351 $($t,)+
352 }
353
354 impl CloudScheme {
355 #[expect(unreachable_patterns)]
358 fn from_scheme_str(s: &str) -> Option<Self> {
359 Some(match s {
360 $($n => Self::$t,)+
361 _ => return None,
362 })
363 }
364
365 pub const fn as_str(&self) -> &'static str {
366 match self {
367 $(Self::$t => $n,)+
368 }
369 }
370 }
371 };
372}
373
374impl_cloud_scheme! {
375 Abfs = "abfs",
376 Abfss = "abfss",
377 Adl = "adl",
378 Az = "az",
379 Azure = "azure",
380 File = "file",
381 FileNoHostname = "file",
382 Gcs = "gcs",
383 Gs = "gs",
384 Hf = "hf",
385 Http = "http",
386 Https = "https",
387 S3 = "s3",
388 S3a = "s3a",
389}
390
391impl CloudScheme {
392 pub fn from_path(path: &str) -> Option<Self> {
393 if let Some(stripped) = path.strip_prefix("file:") {
394 return Some(if stripped.starts_with("//") {
395 Self::File
396 } else {
397 Self::FileNoHostname
398 });
399 }
400
401 Self::from_scheme_str(&path[..path.find("://")?])
402 }
403
404 pub fn strip_scheme_index(&self) -> usize {
407 if let Self::FileNoHostname = self {
408 5
409 } else {
410 self.as_str().len() + 3
411 }
412 }
413}
414
415impl Display for CloudScheme {
416 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
417 Display::fmt(self.as_str(), f)
418 }
419}
420
421pub fn format_file_uri(absolute_local_path: &str) -> PlRefPath {
426 if cfg!(target_family = "windows") || absolute_local_path.starts_with(WINDOWS_EXTPATH_PREFIX) {
436 if let Some(path) = PlPath::normalize_windows_path(absolute_local_path) {
437 PlRefPath::new(format_pl_refstr!("file:///{path}"))
438 } else {
439 PlRefPath::new(format_pl_refstr!("file:///{absolute_local_path}"))
440 }
441 } else {
442 PlRefPath::new(format_pl_refstr!("file://{absolute_local_path}"))
443 }
444}
445
446#[cfg(test)]
447mod tests {
448 use super::*;
449
450 #[test]
451 fn test_plpath_file() {
452 let p = PlRefPath::new("file:///home/user");
453 assert_eq!(
454 (
455 p.scheme(),
456 p.scheme().map(|x| x.as_str()),
457 p.as_str(),
458 p.strip_scheme(),
459 ),
460 (
461 Some(CloudScheme::File),
462 Some("file"),
463 "file:///home/user",
464 "/home/user"
465 )
466 );
467
468 let p = PlRefPath::new("file:/home/user");
469 assert_eq!(
470 (
471 p.scheme(),
472 p.scheme().map(|x| x.as_str()),
473 p.as_str(),
474 p.strip_scheme(),
475 ),
476 (
477 Some(CloudScheme::FileNoHostname),
478 Some("file"),
479 "file:/home/user",
480 "/home/user"
481 )
482 );
483
484 assert_eq!(PlRefPath::new("file://").scheme(), Some(CloudScheme::File));
485
486 assert_eq!(
487 PlRefPath::new("file://").strip_scheme_split_authority(),
488 None
489 );
490
491 assert_eq!(
492 PlRefPath::new("file:///").strip_scheme_split_authority(),
493 Some(("", "/"))
494 );
495
496 assert_eq!(
497 PlRefPath::new("file:///path").strip_scheme_split_authority(),
498 Some(("", "/path"))
499 );
500
501 assert_eq!(
502 PlRefPath::new("file://hostname:80/path").strip_scheme_split_authority(),
503 Some(("hostname:80", "/path"))
504 );
505
506 assert_eq!(
507 PlRefPath::new("file:").scheme(),
508 Some(CloudScheme::FileNoHostname)
509 );
510 assert_eq!(
511 PlRefPath::new("file:/").scheme(),
512 Some(CloudScheme::FileNoHostname)
513 );
514 assert_eq!(
515 PlRefPath::new("file:").strip_scheme_split_authority(),
516 Some(("", ""))
517 );
518 assert_eq!(
519 PlRefPath::new("file:/Local/path").strip_scheme_split_authority(),
520 Some(("", "/Local/path"))
521 );
522
523 assert_eq!(
524 PlRefPath::new(r#"\\?\C:\Windows\system32"#).as_str(),
525 "C:/Windows/system32"
526 );
527 }
528
529 #[test]
530 fn test_plpath_join() {
531 assert_eq!(
532 PlRefPath::new("s3://.../...").join("az://.../...").as_str(),
533 "az://.../..."
534 );
535
536 fn _assert_plpath_join(base: &str, added: &str, expect: &str) {
537 let expect = PlRefPath::new(expect);
539 let base = base.replace('/', std::path::MAIN_SEPARATOR_STR);
540 let added = added.replace('/', std::path::MAIN_SEPARATOR_STR);
541
542 assert_eq!(PlRefPath::new(&base).join(&added), expect);
543
544 let uri_base = format_file_uri(&base);
546 let expect_uri = if added.starts_with(std::path::MAIN_SEPARATOR_STR) {
547 expect.clone()
548 } else {
549 format_file_uri(expect.as_str())
550 };
551
552 assert_eq!(PlRefPath::new(uri_base.as_str()).join(added), expect_uri);
553 }
554
555 macro_rules! assert_plpath_join {
556 ($base:literal + $added:literal => $expect:literal) => {
557 _assert_plpath_join($base, $added, $expect)
558 };
559 }
560
561 assert_plpath_join!("a/b/c/" + "d/e" => "a/b/c/d/e");
562 assert_plpath_join!("a/b/c" + "d/e" => "a/b/c/d/e");
563 assert_plpath_join!("a/b/c" + "d/e/" => "a/b/c/d/e/");
564 assert_plpath_join!("a/b/c" + "/d" => "/d");
565 assert_plpath_join!("a/b/c" + "/d/" => "/d/");
566 assert_plpath_join!("" + "/d/" => "/d/");
567 assert_plpath_join!("/" + "/d/" => "/d/");
568 assert_plpath_join!("/x/y" + "/d/" => "/d/");
569 assert_plpath_join!("/x/y" + "/d" => "/d");
570 assert_plpath_join!("/x/y" + "d" => "/x/y/d");
571
572 assert_plpath_join!("/a/longer" + "path" => "/a/longer/path");
573 assert_plpath_join!("/a/longer" + "/path" => "/path");
574 assert_plpath_join!("/a/longer" + "path/test" => "/a/longer/path/test");
575 assert_plpath_join!("/a/longer" + "/path/test" => "/path/test");
576 }
577
578 #[test]
579 fn test_plpath_name() {
580 assert_eq!(PlRefPath::new("s3://...").file_name(), Some("...".as_ref()));
581 assert_eq!(
582 PlRefPath::new("a/b/file.parquet").file_name(),
583 Some("file.parquet".as_ref())
584 );
585 assert_eq!(
586 PlRefPath::new("file.parquet").file_name(),
587 Some("file.parquet".as_ref())
588 );
589
590 assert_eq!(PlRefPath::new("s3://").file_name(), None);
591 assert_eq!(PlRefPath::new("").file_name(), None);
592 }
593}