1use core::fmt;
2use std::path::{Path, PathBuf};
3use std::str::FromStr;
4use std::sync::Arc;
5
6#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
8#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
9#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
10pub enum PlPath {
11 Local(Arc<Path>),
12 Cloud(PlCloudPath),
13}
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
17pub enum PlPathRef<'a> {
18 Local(&'a Path),
19 Cloud(PlCloudPathRef<'a>),
20}
21
22#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
25pub struct PlCloudPath {
26 scheme: CloudScheme,
28 uri: Arc<str>,
30}
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
33pub struct PlCloudPathRef<'a> {
34 scheme: CloudScheme,
36 uri: &'a str,
38}
39
40impl<'a> fmt::Display for PlCloudPathRef<'a> {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 f.write_str(self.uri())
43 }
44}
45
46impl fmt::Display for PlCloudPath {
47 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 self.as_ref().fmt(f)
49 }
50}
51
52impl PlCloudPath {
53 pub fn as_ref(&self) -> PlCloudPathRef<'_> {
54 PlCloudPathRef {
55 scheme: self.scheme,
56 uri: self.uri.as_ref(),
57 }
58 }
59
60 pub fn strip_scheme(&self) -> &str {
61 &self.uri[self.scheme.as_str().len() + 3..]
62 }
63}
64
65impl PlCloudPathRef<'_> {
66 pub fn new<'a>(uri: &'a str) -> Option<PlCloudPathRef<'a>> {
67 if let Some(i) = uri.find([':', '/']) {
68 if uri[i..].starts_with("://") && CLOUD_SCHEME_REGEX.is_match(&uri[..i]) {
69 let scheme = CloudScheme::from_str(&uri[..i]).unwrap();
70 return Some(PlCloudPathRef { scheme, uri });
71 }
72 }
73
74 None
75 }
76
77 pub fn into_owned(self) -> PlCloudPath {
78 PlCloudPath {
79 scheme: self.scheme,
80 uri: self.uri.into(),
81 }
82 }
83
84 pub fn scheme(&self) -> CloudScheme {
85 self.scheme
86 }
87
88 pub fn uri(&self) -> &str {
89 self.uri
90 }
91
92 pub fn strip_scheme(&self) -> &str {
93 &self.uri[self.scheme.as_str().len() + "://".len()..]
94 }
95}
96
97pub struct AddressDisplay<'a> {
98 addr: PlPathRef<'a>,
99}
100
101impl<'a> fmt::Display for AddressDisplay<'a> {
102 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103 match self.addr {
104 PlPathRef::Local(p) => p.display().fmt(f),
105 PlPathRef::Cloud(p) => p.fmt(f),
106 }
107 }
108}
109
110macro_rules! impl_scheme {
111 ($($t:ident = $n:literal,)+) => {
112 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
113 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
114 #[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
115 pub enum CloudScheme {
116 $($t,)+
117 }
118
119 impl FromStr for CloudScheme {
120 type Err = ();
121
122 fn from_str(s: &str) -> Result<Self, Self::Err> {
123 match s {
124 $($n => Ok(Self::$t),)+
125 _ => Err(()),
126 }
127 }
128 }
129
130 impl CloudScheme {
131 pub fn as_str(&self) -> &'static str {
132 match self {
133 $(Self::$t => $n,)+
134 }
135 }
136 }
137 };
138}
139
140impl_scheme! {
141 S3 = "s3",
142 S3a = "s3a",
143 Gs = "gs",
144 Gcs = "gcs",
145 File = "file",
146 Abfs = "abfs",
147 Abfss = "abfss",
148 Azure = "azure",
149 Az = "az",
150 Adl = "adl",
151 Http = "http",
152 Https = "https",
153 Hf = "hf",
154}
155
156impl fmt::Display for CloudScheme {
157 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
158 f.write_str(self.as_str())
159 }
160}
161
162crate::regex_cache::cached_regex! {
163 static CLOUD_SCHEME_REGEX = r"^(s3a?|gs|gcs|file|abfss?|azure|az|adl|https?|hf)$";
164}
165
166impl<'a> PlPathRef<'a> {
167 pub fn scheme(&self) -> Option<CloudScheme> {
168 match self {
169 Self::Local(_) => None,
170 Self::Cloud(p) => Some(p.scheme),
171 }
172 }
173
174 pub fn is_local(&self) -> bool {
175 matches!(self, Self::Local(_))
176 }
177
178 pub fn is_cloud_url(&self) -> bool {
179 matches!(self, Self::Cloud(_))
180 }
181
182 pub fn as_local_path(&self) -> Option<&Path> {
183 match self {
184 Self::Local(p) => Some(p),
185 Self::Cloud(_) => None,
186 }
187 }
188
189 pub fn as_cloud_addr(&self) -> Option<PlCloudPathRef<'_>> {
190 match self {
191 Self::Local(_) => None,
192 Self::Cloud(p) => Some(*p),
193 }
194 }
195
196 pub fn join(&self, other: impl AsRef<str>) -> PlPath {
197 let other = other.as_ref();
198 if other.is_empty() {
199 return self.into_owned();
200 }
201
202 match self {
203 Self::Local(p) => PlPath::Local(p.join(other).into()),
204 Self::Cloud(p) => {
205 if let Some(cloud_path) = PlCloudPathRef::new(other) {
206 return PlPath::Cloud(cloud_path.into_owned());
207 }
208
209 let needs_slash = !p.uri.ends_with('/') && !other.starts_with('/');
210
211 let mut out =
212 String::with_capacity(p.uri.len() + usize::from(needs_slash) + other.len());
213
214 out.push_str(p.uri);
215 if needs_slash {
216 out.push('/');
217 }
218 out.push_str(other);
223
224 let uri = out.into();
225 PlPath::Cloud(PlCloudPath {
226 scheme: p.scheme,
227 uri,
228 })
229 },
230 }
231 }
232
233 pub fn display(&self) -> AddressDisplay<'_> {
234 AddressDisplay { addr: *self }
235 }
236
237 pub fn from_local_path(path: &'a Path) -> Self {
238 Self::Local(path)
239 }
240
241 pub fn new(uri: &'a str) -> Self {
242 if let Some(i) = uri.find([':', '/']) {
243 if uri[i..].starts_with("://") && CLOUD_SCHEME_REGEX.is_match(&uri[..i]) {
244 let scheme = CloudScheme::from_str(&uri[..i]).unwrap();
245 return Self::Cloud(PlCloudPathRef { scheme, uri });
246 }
247 }
248
249 Self::from_local_path(Path::new(uri))
250 }
251
252 pub fn into_owned(self) -> PlPath {
253 match self {
254 Self::Local(p) => PlPath::Local(p.into()),
255 Self::Cloud(p) => PlPath::Cloud(p.into_owned()),
256 }
257 }
258
259 pub fn strip_scheme(&self) -> &str {
260 match self {
261 Self::Local(p) => p.to_str().unwrap(),
262 Self::Cloud(p) => p.strip_scheme(),
263 }
264 }
265
266 pub fn parent(&self) -> Option<Self> {
267 Some(match self {
268 Self::Local(p) => Self::Local(p.parent()?),
269 Self::Cloud(p) => {
270 let uri = p.uri;
271 let offset_start = p.scheme.as_str().len() + 3;
272 let last_slash = uri[offset_start..]
273 .char_indices()
274 .rev()
275 .find(|(_, c)| *c == '/')?
276 .0;
277 let uri = &uri[..offset_start + last_slash];
278
279 Self::Cloud(PlCloudPathRef {
280 scheme: p.scheme,
281 uri,
282 })
283 },
284 })
285 }
286
287 pub fn extension(&self) -> Option<&str> {
288 match self {
289 Self::Local(path) => path.extension().and_then(|e| e.to_str()),
290 Self::Cloud(_) => {
291 let offset_path = self.strip_scheme();
292 let separator = '/';
293
294 let mut ext_start = None;
295 for (i, c) in offset_path.char_indices() {
296 if c == separator {
297 ext_start = None;
298 }
299
300 if c == '.' && ext_start.is_none() {
301 ext_start = Some(i);
302 }
303 }
304
305 ext_start.map(|i| &offset_path[i + 1..])
306 },
307 }
308 }
309
310 pub fn to_str(&self) -> &'a str {
311 match self {
312 Self::Local(p) => p.to_str().unwrap(),
313 Self::Cloud(p) => p.uri,
314 }
315 }
316
317 pub fn offset_bytes(&'a self, n: usize) -> PathBuf {
320 let s = self.to_str();
321 if let Some(scheme) = self.scheme()
322 && n > 0
323 {
324 debug_assert!(n >= scheme.as_str().len())
325 }
326 PathBuf::from(&s[n..])
327 }
328}
329
330impl PlPath {
331 pub fn new(uri: &str) -> Self {
332 PlPathRef::new(uri).into_owned()
333 }
334
335 pub fn display(&self) -> AddressDisplay<'_> {
336 AddressDisplay {
337 addr: match self {
338 Self::Local(p) => PlPathRef::Local(p.as_ref()),
339 Self::Cloud(p) => PlPathRef::Cloud(p.as_ref()),
340 },
341 }
342 }
343
344 pub fn is_local(&self) -> bool {
345 self.as_ref().is_local()
346 }
347
348 pub fn is_cloud_url(&self) -> bool {
349 self.as_ref().is_cloud_url()
350 }
351
352 #[expect(clippy::should_implement_trait)]
354 pub fn from_str(uri: &str) -> Self {
355 Self::new(uri)
356 }
357
358 pub fn from_string(uri: String) -> Self {
359 Self::new(&uri)
360 }
361
362 pub fn as_ref(&self) -> PlPathRef<'_> {
363 match self {
364 Self::Local(p) => PlPathRef::Local(p.as_ref()),
365 Self::Cloud(p) => PlPathRef::Cloud(p.as_ref()),
366 }
367 }
368
369 pub fn cloud_scheme(&self) -> Option<CloudScheme> {
370 match self {
371 Self::Local(_) => None,
372 Self::Cloud(p) => Some(p.scheme),
373 }
374 }
375
376 pub fn to_str(&self) -> &str {
377 match self {
378 Self::Local(p) => p.to_str().unwrap(),
379 Self::Cloud(p) => p.uri.as_ref(),
380 }
381 }
382
383 pub fn into_local_path(self) -> Option<Arc<Path>> {
384 match self {
385 PlPath::Local(path) => Some(path),
386 PlPath::Cloud(_) => None,
387 }
388 }
389}
390
391#[cfg(test)]
392mod tests {
393 use super::*;
394
395 #[test]
396 fn plpath_join() {
397 macro_rules! assert_plpath_join {
398 ($base:literal + $added:literal => $result:literal$(, $uri_result:literal)?) => {
399 let path_base = $base.chars().map(|c| match c {
401 '/' => std::path::MAIN_SEPARATOR,
402 c => c,
403 }).collect::<String>();
404 let path_added = $added.chars().map(|c| match c {
405 '/' => std::path::MAIN_SEPARATOR,
406 c => c,
407 }).collect::<String>();
408 let path_result = $result.chars().map(|c| match c {
409 '/' => std::path::MAIN_SEPARATOR,
410 c => c,
411 }).collect::<String>();
412 assert_eq!(PlPath::new(&path_base).as_ref().join(path_added).to_str(), path_result);
413
414 let uri_base = format!("file://{}", $base);
416 #[allow(unused_variables)]
417 let result = {
418 let x = $result;
419 $(let x = $uri_result;)?
420 x
421 };
422 let uri_result = format!("file://{result}");
423 assert_eq!(
424 PlPath::new(uri_base.as_str())
425 .as_ref()
426 .join($added)
427 .to_str(),
428 uri_result.as_str()
429 );
430 };
431 }
432
433 assert_plpath_join!("a/b/c/" + "d/e" => "a/b/c/d/e");
434 assert_plpath_join!("a/b/c" + "d/e" => "a/b/c/d/e");
435 assert_plpath_join!("a/b/c" + "d/e/" => "a/b/c/d/e/");
436 assert_plpath_join!("a/b/c" + "" => "a/b/c");
437 assert_plpath_join!("a/b/c" + "/d" => "/d", "a/b/c/d");
438 assert_plpath_join!("a/b/c" + "/d/" => "/d/", "a/b/c/d/");
439 assert_plpath_join!("" + "/d/" => "/d/");
440 assert_plpath_join!("/" + "/d/" => "/d/", "//d/");
441 assert_plpath_join!("/x/y" + "/d/" => "/d/", "/x/y/d/");
442 assert_plpath_join!("/x/y" + "/d" => "/d", "/x/y/d");
443 assert_plpath_join!("/x/y" + "d" => "/x/y/d");
444
445 assert_plpath_join!("/a/longer" + "path" => "/a/longer/path");
446 assert_plpath_join!("/a/longer" + "/path" => "/path", "/a/longer/path");
447 assert_plpath_join!("/a/longer" + "path/wow" => "/a/longer/path/wow");
448 assert_plpath_join!("/a/longer" + "/path/wow" => "/path/wow", "/a/longer/path/wow");
449 assert_plpath_join!("/an/even/longer" + "path" => "/an/even/longer/path");
450 assert_plpath_join!("/an/even/longer" + "/path" => "/path", "/an/even/longer/path");
451 assert_plpath_join!("/an/even/longer" + "path/wow" => "/an/even/longer/path/wow");
452 assert_plpath_join!("/an/even/longer" + "/path/wow" => "/path/wow", "/an/even/longer/path/wow");
453 }
454}