1use core::fmt;
2use std::ffi::OsStr;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
8#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
9#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
10pub enum PlPath {
11 Local(Arc<Path>),
12 Cloud(PlCloudPath),
13}
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
17pub enum PlPathRef<'a> {
18 Local(&'a Path),
19 Cloud(PlCloudPathRef<'a>),
20}
21
22#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
25pub struct PlCloudPath {
26 scheme: CloudScheme,
28 uri: Arc<str>,
30}
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
33pub struct PlCloudPathRef<'a> {
34 scheme: CloudScheme,
36 uri: &'a str,
38}
39
40impl<'a> fmt::Display for PlCloudPathRef<'a> {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 f.write_str(self.uri())
43 }
44}
45
46impl fmt::Display for PlCloudPath {
47 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 self.as_ref().fmt(f)
49 }
50}
51
52impl PlCloudPath {
53 pub fn as_ref(&self) -> PlCloudPathRef<'_> {
54 PlCloudPathRef {
55 scheme: self.scheme,
56 uri: self.uri.as_ref(),
57 }
58 }
59
60 pub fn strip_scheme(&self) -> &str {
61 self.scheme.strip_scheme_from_uri(&self.uri)
62 }
63}
64
65impl PlCloudPathRef<'_> {
66 pub fn new<'a>(uri: &'a str) -> Option<PlCloudPathRef<'a>> {
67 CloudScheme::from_uri(uri).map(|scheme| PlCloudPathRef { scheme, uri })
68 }
69
70 pub fn into_owned(self) -> PlCloudPath {
71 PlCloudPath {
72 scheme: self.scheme,
73 uri: self.uri.into(),
74 }
75 }
76
77 pub fn scheme(&self) -> CloudScheme {
78 self.scheme
79 }
80
81 pub fn uri(&self) -> &str {
82 self.uri
83 }
84
85 pub fn strip_scheme(&self) -> &str {
86 self.scheme.strip_scheme_from_uri(self.uri)
87 }
88}
89
90pub struct PlPathDisplay<'a> {
91 path: PlPathRef<'a>,
92}
93
94impl<'a> fmt::Display for PlPathDisplay<'a> {
95 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96 match self.path {
97 PlPathRef::Local(p) => p.display().fmt(f),
98 PlPathRef::Cloud(p) => p.fmt(f),
99 }
100 }
101}
102
103macro_rules! impl_cloud_scheme {
104 ($($t:ident = $n:literal,)+) => {
105 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
106 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
107 #[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
108 pub enum CloudScheme {
109 $($t,)+
110 }
111
112 impl CloudScheme {
113 #[expect(unreachable_patterns)]
116 fn from_scheme(s: &str) -> Option<Self> {
117 Some(match s {
118 $($n => Self::$t,)+
119 _ => return None,
120 })
121 }
122
123 pub const fn as_str(&self) -> &'static str {
124 match self {
125 $(Self::$t => $n,)+
126 }
127 }
128 }
129 };
130}
131
132impl_cloud_scheme! {
133 Abfs = "abfs",
134 Abfss = "abfss",
135 Adl = "adl",
136 Az = "az",
137 Azure = "azure",
138 File = "file",
139 FileNoHostname = "file",
140 Gcs = "gcs",
141 Gs = "gs",
142 Hf = "hf",
143 Http = "http",
144 Https = "https",
145 S3 = "s3",
146 S3a = "s3a",
147}
148
149impl CloudScheme {
150 pub fn from_uri(path: &str) -> Option<Self> {
151 if path.starts_with("file:/") {
152 return Some(if path.as_bytes().get(6) != Some(&b'/') {
153 Self::FileNoHostname
154 } else {
155 Self::File
156 });
157 }
158
159 Self::from_scheme(&path[..path.find("://")?])
160 }
161
162 pub fn strip_scheme_from_uri<'a>(&self, uri: &'a str) -> &'a str {
163 &uri[self.strip_scheme_index()..]
164 }
165
166 pub fn strip_scheme_index(&self) -> usize {
169 if let Self::FileNoHostname = self {
170 5
171 } else {
172 self.as_str().len() + 3
173 }
174 }
175}
176
177impl fmt::Display for CloudScheme {
178 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
179 f.write_str(self.as_str())
180 }
181}
182
183impl<'a> PlPathRef<'a> {
184 pub fn scheme(&self) -> Option<CloudScheme> {
185 match self {
186 Self::Local(_) => None,
187 Self::Cloud(p) => Some(p.scheme),
188 }
189 }
190
191 pub fn is_local(&self) -> bool {
192 matches!(self, Self::Local(_))
193 }
194
195 pub fn is_cloud_url(&self) -> bool {
196 matches!(self, Self::Cloud(_))
197 }
198
199 pub fn as_local_path(&self) -> Option<&Path> {
200 match self {
201 Self::Local(p) => Some(p),
202 Self::Cloud(_) => None,
203 }
204 }
205
206 pub fn as_cloud_path(&'a self) -> Option<PlCloudPathRef<'a>> {
207 match self {
208 Self::Local(_) => None,
209 Self::Cloud(p) => Some(*p),
210 }
211 }
212
213 pub fn join(&self, other: impl AsRef<str>) -> PlPath {
214 let other = other.as_ref();
215 if other.is_empty() {
216 return self.into_owned();
217 }
218
219 match self {
220 Self::Local(p) => PlPath::Local(p.join(other).into()),
221 Self::Cloud(p) => {
222 if let Some(cloud_path) = PlCloudPathRef::new(other) {
223 return PlPath::Cloud(cloud_path.into_owned());
224 }
225
226 let needs_slash = !p.uri.ends_with('/') && !other.starts_with('/');
227
228 let mut out =
229 String::with_capacity(p.uri.len() + usize::from(needs_slash) + other.len());
230
231 out.push_str(p.uri);
232 if needs_slash {
233 out.push('/');
234 }
235 out.push_str(other);
240
241 let uri = out.into();
242 PlPath::Cloud(PlCloudPath {
243 scheme: p.scheme,
244 uri,
245 })
246 },
247 }
248 }
249
250 pub fn display(&self) -> PlPathDisplay<'_> {
251 PlPathDisplay { path: *self }
252 }
253
254 pub fn from_local_path(path: &'a Path) -> Self {
255 Self::Local(path)
256 }
257
258 pub fn new(uri: &'a str) -> Self {
259 if let Some(scheme) = CloudScheme::from_uri(uri) {
260 Self::Cloud(PlCloudPathRef { scheme, uri })
261 } else {
262 Self::from_local_path(Path::new(uri))
263 }
264 }
265
266 pub fn into_owned(self) -> PlPath {
267 match self {
268 Self::Local(p) => PlPath::Local(p.into()),
269 Self::Cloud(p) => PlPath::Cloud(p.into_owned()),
270 }
271 }
272
273 pub fn strip_scheme(&self) -> &str {
274 match self {
275 Self::Local(p) => p.to_str().unwrap(),
276 Self::Cloud(p) => p.strip_scheme(),
277 }
278 }
279
280 pub fn parent(&self) -> Option<Self> {
281 Some(match self {
282 Self::Local(p) => Self::Local(p.parent()?),
283 Self::Cloud(p) => {
284 let uri = p.uri;
285 let offset_start = p.scheme.strip_scheme_index();
286 let last_slash = uri[offset_start..]
287 .char_indices()
288 .rev()
289 .find(|(_, c)| *c == '/')?
290 .0;
291 let uri = &uri[..offset_start + last_slash];
292
293 Self::Cloud(PlCloudPathRef {
294 scheme: p.scheme,
295 uri,
296 })
297 },
298 })
299 }
300
301 pub fn file_name(&self) -> Option<&OsStr> {
302 match self {
303 Self::Local(p) => {
304 if p.is_dir() {
305 None
306 } else {
307 p.file_name()
308 }
309 },
310 Self::Cloud(p) => {
311 if p.scheme() == CloudScheme::File
312 && std::fs::metadata(p.strip_scheme()).is_ok_and(|x| x.is_dir())
313 {
314 return None;
315 }
316
317 let p = p.strip_scheme();
318 let out = p.rfind('/').map_or(p, |i| &p[i + 1..]);
319 (!out.is_empty()).then_some(out.as_ref())
320 },
321 }
322 }
323
324 pub fn extension(&self) -> Option<&str> {
325 match self {
326 Self::Local(path) => path.extension().and_then(|e| e.to_str()),
327 Self::Cloud(_) => {
328 let after_scheme = self.strip_scheme();
329
330 after_scheme.rfind(['.', '/']).and_then(|i| {
331 after_scheme[i..]
332 .starts_with('.')
333 .then_some(&after_scheme[i + 1..])
334 })
335 },
336 }
337 }
338
339 pub fn to_str(&self) -> &'a str {
340 match self {
341 Self::Local(p) => p.to_str().unwrap(),
342 Self::Cloud(p) => p.uri,
343 }
344 }
345
346 pub fn offset_bytes(&'a self, n: usize) -> PathBuf {
349 let s = self.to_str();
350 if let Some(scheme) = self.scheme()
351 && n > 0
352 {
353 debug_assert!(n >= scheme.as_str().len())
354 }
355 PathBuf::from(&s[n..])
356 }
357
358 pub fn strip_scheme_split_authority(&self) -> Option<(&'_ str, &'_ str)> {
372 match self.scheme() {
373 None | Some(CloudScheme::File | CloudScheme::FileNoHostname) => {
374 Some(("", self.strip_scheme()))
375 },
376 Some(scheme) => {
377 let path_str = self.to_str();
378 let position = self.authority_end_position();
379
380 if position < path_str.len() {
381 assert!(path_str[position..].starts_with('/'));
382 }
383
384 (position < path_str.len()).then_some((
385 &path_str[scheme.strip_scheme_index()..position],
386 &path_str[position..],
387 ))
388 },
389 }
390 }
391
392 pub fn authority_end_position(&self) -> usize {
395 match self.scheme() {
396 None | Some(CloudScheme::File | CloudScheme::FileNoHostname) => 0,
397 Some(_) => {
398 let after_scheme = self.strip_scheme();
399 let offset = self.to_str().len() - after_scheme.len();
400
401 offset + after_scheme.find('/').unwrap_or(after_scheme.len())
402 },
403 }
404 }
405
406 pub fn to_absolute_path(&self) -> Option<PlPath> {
409 if let Self::Local(p) = self
410 && !p.is_absolute()
411 && !p.to_str().unwrap().is_empty()
412 {
413 Some(PlPath::new(
414 std::path::absolute(p).unwrap().to_str().unwrap(),
415 ))
416 } else {
417 None
418 }
419 }
420}
421
422impl PlPath {
423 pub fn new(uri: &str) -> Self {
424 PlPathRef::new(uri).into_owned()
425 }
426
427 pub fn display(&self) -> PlPathDisplay<'_> {
428 PlPathDisplay {
429 path: match self {
430 Self::Local(p) => PlPathRef::Local(p.as_ref()),
431 Self::Cloud(p) => PlPathRef::Cloud(p.as_ref()),
432 },
433 }
434 }
435
436 pub fn is_local(&self) -> bool {
437 self.as_ref().is_local()
438 }
439
440 pub fn is_cloud_url(&self) -> bool {
441 self.as_ref().is_cloud_url()
442 }
443
444 #[expect(clippy::should_implement_trait)]
446 pub fn from_str(uri: &str) -> Self {
447 Self::new(uri)
448 }
449
450 pub fn from_string(uri: String) -> Self {
451 Self::new(&uri)
452 }
453
454 pub fn as_ref(&self) -> PlPathRef<'_> {
455 match self {
456 Self::Local(p) => PlPathRef::Local(p.as_ref()),
457 Self::Cloud(p) => PlPathRef::Cloud(p.as_ref()),
458 }
459 }
460
461 pub fn cloud_scheme(&self) -> Option<CloudScheme> {
462 match self {
463 Self::Local(_) => None,
464 Self::Cloud(p) => Some(p.scheme),
465 }
466 }
467
468 pub fn to_str(&self) -> &str {
469 match self {
470 Self::Local(p) => p.to_str().unwrap(),
471 Self::Cloud(p) => p.uri.as_ref(),
472 }
473 }
474
475 pub fn into_local_path(self) -> Option<Arc<Path>> {
476 match self {
477 PlPath::Local(path) => Some(path),
478 PlPath::Cloud(_) => None,
479 }
480 }
481}
482
483#[cfg(test)]
484mod tests {
485 use super::*;
486
487 #[test]
488 fn test_plpath_file() {
489 let p = PlPath::new("file:///home/user");
490 assert_eq!(
491 (
492 p.cloud_scheme(),
493 p.cloud_scheme().map(|x| x.as_str()),
494 p.to_str(),
495 p.as_ref().strip_scheme(),
496 ),
497 (
498 Some(CloudScheme::File),
499 Some("file"),
500 "file:///home/user",
501 "/home/user"
502 )
503 );
504
505 let p = PlPath::new("file:/home/user");
506 assert_eq!(
507 (
508 p.cloud_scheme(),
509 p.cloud_scheme().map(|x| x.as_str()),
510 p.to_str(),
511 p.as_ref().strip_scheme(),
512 ),
513 (
514 Some(CloudScheme::FileNoHostname),
515 Some("file"),
516 "file:/home/user",
517 "/home/user"
518 )
519 );
520 }
521
522 #[test]
523 fn plpath_join() {
524 fn _assert_plpath_join(base: &str, added: &str, expect: &str, expect_uri: Option<&str>) {
525 let path_base = base
527 .chars()
528 .map(|c| match c {
529 '/' => std::path::MAIN_SEPARATOR,
530 c => c,
531 })
532 .collect::<String>();
533 let path_added = added
534 .chars()
535 .map(|c| match c {
536 '/' => std::path::MAIN_SEPARATOR,
537 c => c,
538 })
539 .collect::<String>();
540 let path_result = expect
541 .chars()
542 .map(|c| match c {
543 '/' => std::path::MAIN_SEPARATOR,
544 c => c,
545 })
546 .collect::<String>();
547 assert_eq!(
548 PlPath::new(&path_base).as_ref().join(path_added).to_str(),
549 path_result
550 );
551
552 if let Some(expect_uri) = expect_uri {
553 let uri_base = format!("file://{base}");
555
556 let uri_result = format!("file://{expect_uri}");
557 assert_eq!(
558 PlPath::new(uri_base.as_str()).as_ref().join(added).to_str(),
559 uri_result.as_str()
560 );
561 }
562 }
563
564 macro_rules! assert_plpath_join {
565 ($base:literal + $added:literal => $expect:literal) => {
566 _assert_plpath_join($base, $added, $expect, None)
567 };
568 ($base:literal + $added:literal => $expect:literal, $uri_result:literal) => {
569 _assert_plpath_join($base, $added, $expect, Some($uri_result))
570 };
571 }
572
573 assert_plpath_join!("a/b/c/" + "d/e" => "a/b/c/d/e");
574 assert_plpath_join!("a/b/c" + "d/e" => "a/b/c/d/e");
575 assert_plpath_join!("a/b/c" + "d/e/" => "a/b/c/d/e/");
576 assert_plpath_join!("a/b/c" + "" => "a/b/c");
577 assert_plpath_join!("a/b/c" + "/d" => "/d", "a/b/c/d");
578 assert_plpath_join!("a/b/c" + "/d/" => "/d/", "a/b/c/d/");
579 assert_plpath_join!("" + "/d/" => "/d/");
580 assert_plpath_join!("/" + "/d/" => "/d/", "//d/");
581 assert_plpath_join!("/x/y" + "/d/" => "/d/", "/x/y/d/");
582 assert_plpath_join!("/x/y" + "/d" => "/d", "/x/y/d");
583 assert_plpath_join!("/x/y" + "d" => "/x/y/d");
584
585 assert_plpath_join!("/a/longer" + "path" => "/a/longer/path");
586 assert_plpath_join!("/a/longer" + "/path" => "/path", "/a/longer/path");
587 assert_plpath_join!("/a/longer" + "path/wow" => "/a/longer/path/wow");
588 assert_plpath_join!("/a/longer" + "/path/wow" => "/path/wow", "/a/longer/path/wow");
589 assert_plpath_join!("/an/even/longer" + "path" => "/an/even/longer/path");
590 assert_plpath_join!("/an/even/longer" + "/path" => "/path", "/an/even/longer/path");
591 assert_plpath_join!("/an/even/longer" + "path/wow" => "/an/even/longer/path/wow");
592 assert_plpath_join!("/an/even/longer" + "/path/wow" => "/path/wow", "/an/even/longer/path/wow");
593 }
594
595 #[test]
596 fn test_plpath_name() {
597 assert_eq!(PlPathRef::new("s3://...").file_name(), Some("...".as_ref()));
598 assert_eq!(
599 PlPathRef::new("a/b/file.parquet").file_name(),
600 Some("file.parquet".as_ref())
601 );
602 assert_eq!(
603 PlPathRef::new("file.parquet").file_name(),
604 Some("file.parquet".as_ref())
605 );
606
607 assert_eq!(PlPathRef::new("s3://").file_name(), None);
608 assert_eq!(PlPathRef::new("").file_name(), None);
609 }
610}