1use core::fmt;
2use std::path::{Path, PathBuf};
3use std::str::FromStr;
4use std::sync::Arc;
5
6#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
8#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
9#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
10pub enum PlPath {
11 Local(Arc<Path>),
12 Cloud(PlCloudPath),
13}
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
17pub enum PlPathRef<'a> {
18 Local(&'a Path),
19 Cloud(PlCloudPathRef<'a>),
20}
21
22#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
25pub struct PlCloudPath {
26 scheme: CloudScheme,
28 uri: Arc<str>,
30}
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
33pub struct PlCloudPathRef<'a> {
34 scheme: CloudScheme,
36 uri: &'a str,
38}
39
40impl<'a> fmt::Display for PlCloudPathRef<'a> {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 f.write_str(self.uri())
43 }
44}
45
46impl fmt::Display for PlCloudPath {
47 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 self.as_ref().fmt(f)
49 }
50}
51
52impl PlCloudPath {
53 pub fn as_ref(&self) -> PlCloudPathRef<'_> {
54 PlCloudPathRef {
55 scheme: self.scheme,
56 uri: self.uri.as_ref(),
57 }
58 }
59
60 pub fn strip_scheme(&self) -> &str {
61 &self.uri[self.scheme.as_str().len() + 3..]
62 }
63}
64
65impl PlCloudPathRef<'_> {
66 pub fn into_owned(self) -> PlCloudPath {
67 PlCloudPath {
68 scheme: self.scheme,
69 uri: self.uri.into(),
70 }
71 }
72
73 pub fn scheme(&self) -> CloudScheme {
74 self.scheme
75 }
76
77 pub fn uri(&self) -> &str {
78 self.uri
79 }
80
81 pub fn strip_scheme(&self) -> &str {
82 &self.uri[self.scheme.as_str().len() + "://".len()..]
83 }
84}
85
86pub struct AddressDisplay<'a> {
87 addr: PlPathRef<'a>,
88}
89
90impl<'a> fmt::Display for AddressDisplay<'a> {
91 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92 match self.addr {
93 PlPathRef::Local(p) => p.display().fmt(f),
94 PlPathRef::Cloud(p) => p.fmt(f),
95 }
96 }
97}
98
99macro_rules! impl_scheme {
100 ($($t:ident = $n:literal,)+) => {
101 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
102 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
103 #[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
104 pub enum CloudScheme {
105 $($t,)+
106 }
107
108 impl FromStr for CloudScheme {
109 type Err = ();
110
111 fn from_str(s: &str) -> Result<Self, Self::Err> {
112 match s {
113 $($n => Ok(Self::$t),)+
114 _ => Err(()),
115 }
116 }
117 }
118
119 impl CloudScheme {
120 pub fn as_str(&self) -> &'static str {
121 match self {
122 $(Self::$t => $n,)+
123 }
124 }
125 }
126 };
127}
128
129impl_scheme! {
130 S3 = "s3",
131 S3a = "s3a",
132 Gs = "gs",
133 Gcs = "gcs",
134 File = "file",
135 Abfs = "abfs",
136 Abfss = "abfss",
137 Azure = "azure",
138 Az = "az",
139 Adl = "adl",
140 Http = "http",
141 Https = "https",
142 Hf = "hf",
143}
144
145impl fmt::Display for CloudScheme {
146 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
147 f.write_str(self.as_str())
148 }
149}
150
151crate::regex_cache::cached_regex! {
152 static CLOUD_SCHEME_REGEX = r"^(s3a?|gs|gcs|file|abfss?|azure|az|adl|https?|hf)$";
153}
154
155impl<'a> PlPathRef<'a> {
156 pub fn scheme(&self) -> Option<CloudScheme> {
157 match self {
158 Self::Local(_) => None,
159 Self::Cloud(p) => Some(p.scheme),
160 }
161 }
162
163 pub fn is_local(&self) -> bool {
164 matches!(self, Self::Local(_))
165 }
166
167 pub fn is_cloud_url(&self) -> bool {
168 matches!(self, Self::Cloud(_))
169 }
170
171 pub fn as_local_path(&self) -> Option<&Path> {
172 match self {
173 Self::Local(p) => Some(p),
174 Self::Cloud(_) => None,
175 }
176 }
177
178 pub fn as_cloud_addr(&self) -> Option<PlCloudPathRef<'_>> {
179 match self {
180 Self::Local(_) => None,
181 Self::Cloud(p) => Some(*p),
182 }
183 }
184
185 pub fn join(&self, other: impl AsRef<str>) -> PlPath {
186 let other = other.as_ref();
187 if other.is_empty() {
188 return self.into_owned();
189 }
190
191 match self {
192 Self::Local(p) => PlPath::Local(p.join(other).into()),
193 Self::Cloud(p) => {
194 let needs_slash = !p.uri.ends_with('/') && !other.starts_with('/');
195
196 let mut out =
197 String::with_capacity(p.uri.len() + usize::from(needs_slash) + other.len());
198
199 out.push_str(p.uri);
200 if needs_slash {
201 out.push('/');
202 }
203 out.push_str(other);
208
209 let uri = out.into();
210 PlPath::Cloud(PlCloudPath {
211 scheme: p.scheme,
212 uri,
213 })
214 },
215 }
216 }
217
218 pub fn display(&self) -> AddressDisplay<'_> {
219 AddressDisplay { addr: *self }
220 }
221
222 pub fn from_local_path(path: &'a Path) -> Self {
223 Self::Local(path)
224 }
225
226 pub fn new(uri: &'a str) -> Self {
227 if let Some(i) = uri.find([':', '/']) {
228 if uri[i..].starts_with("://") && CLOUD_SCHEME_REGEX.is_match(&uri[..i]) {
229 let scheme = CloudScheme::from_str(&uri[..i]).unwrap();
230 return Self::Cloud(PlCloudPathRef { scheme, uri });
231 }
232 }
233
234 Self::from_local_path(Path::new(uri))
235 }
236
237 pub fn into_owned(self) -> PlPath {
238 match self {
239 Self::Local(p) => PlPath::Local(p.into()),
240 Self::Cloud(p) => PlPath::Cloud(p.into_owned()),
241 }
242 }
243
244 pub fn strip_scheme(&self) -> &str {
245 match self {
246 Self::Local(p) => p.to_str().unwrap(),
247 Self::Cloud(p) => p.strip_scheme(),
248 }
249 }
250
251 pub fn parent(&self) -> Option<Self> {
252 Some(match self {
253 Self::Local(p) => Self::Local(p.parent()?),
254 Self::Cloud(p) => {
255 let uri = p.uri;
256 let offset_start = p.scheme.as_str().len() + 3;
257 let last_slash = uri[offset_start..]
258 .char_indices()
259 .rev()
260 .find(|(_, c)| *c == '/')?
261 .0;
262 let uri = &uri[..offset_start + last_slash];
263
264 Self::Cloud(PlCloudPathRef {
265 scheme: p.scheme,
266 uri,
267 })
268 },
269 })
270 }
271
272 pub fn extension(&self) -> Option<&str> {
273 match self {
274 Self::Local(path) => path.extension().and_then(|e| e.to_str()),
275 Self::Cloud(_) => {
276 let offset_path = self.strip_scheme();
277 let separator = '/';
278
279 let mut ext_start = None;
280 for (i, c) in offset_path.char_indices() {
281 if c == separator {
282 ext_start = None;
283 }
284
285 if c == '.' && ext_start.is_none() {
286 ext_start = Some(i);
287 }
288 }
289
290 ext_start.map(|i| &offset_path[i + 1..])
291 },
292 }
293 }
294
295 pub fn to_str(&self) -> &'a str {
296 match self {
297 Self::Local(p) => p.to_str().unwrap(),
298 Self::Cloud(p) => p.uri,
299 }
300 }
301
302 pub fn offset_bytes(&'a self, n: usize) -> PathBuf {
305 let s = self.to_str();
306 if let Some(scheme) = self.scheme()
307 && n > 0
308 {
309 debug_assert!(n >= scheme.as_str().len())
310 }
311 PathBuf::from(&s[n..])
312 }
313}
314
315impl PlPath {
316 pub fn new(uri: &str) -> Self {
317 PlPathRef::new(uri).into_owned()
318 }
319
320 pub fn display(&self) -> AddressDisplay<'_> {
321 AddressDisplay {
322 addr: match self {
323 Self::Local(p) => PlPathRef::Local(p.as_ref()),
324 Self::Cloud(p) => PlPathRef::Cloud(p.as_ref()),
325 },
326 }
327 }
328
329 pub fn is_local(&self) -> bool {
330 self.as_ref().is_local()
331 }
332
333 pub fn is_cloud_url(&self) -> bool {
334 self.as_ref().is_cloud_url()
335 }
336
337 #[expect(clippy::should_implement_trait)]
339 pub fn from_str(uri: &str) -> Self {
340 Self::new(uri)
341 }
342
343 pub fn from_string(uri: String) -> Self {
344 Self::new(&uri)
345 }
346
347 pub fn as_ref(&self) -> PlPathRef<'_> {
348 match self {
349 Self::Local(p) => PlPathRef::Local(p.as_ref()),
350 Self::Cloud(p) => PlPathRef::Cloud(p.as_ref()),
351 }
352 }
353
354 pub fn cloud_scheme(&self) -> Option<CloudScheme> {
355 match self {
356 Self::Local(_) => None,
357 Self::Cloud(p) => Some(p.scheme),
358 }
359 }
360
361 pub fn to_str(&self) -> &str {
362 match self {
363 Self::Local(p) => p.to_str().unwrap(),
364 Self::Cloud(p) => p.uri.as_ref(),
365 }
366 }
367
368 pub fn into_local_path(self) -> Option<Arc<Path>> {
369 match self {
370 PlPath::Local(path) => Some(path),
371 PlPath::Cloud(_) => None,
372 }
373 }
374}
375
376#[cfg(test)]
377mod tests {
378 use super::*;
379
380 #[test]
381 fn plpath_join() {
382 macro_rules! assert_plpath_join {
383 ($base:literal + $added:literal => $result:literal$(, $uri_result:literal)?) => {
384 let path_base = $base.chars().map(|c| match c {
386 '/' => std::path::MAIN_SEPARATOR,
387 c => c,
388 }).collect::<String>();
389 let path_added = $added.chars().map(|c| match c {
390 '/' => std::path::MAIN_SEPARATOR,
391 c => c,
392 }).collect::<String>();
393 let path_result = $result.chars().map(|c| match c {
394 '/' => std::path::MAIN_SEPARATOR,
395 c => c,
396 }).collect::<String>();
397 assert_eq!(PlPath::new(&path_base).as_ref().join(path_added).to_str(), path_result);
398
399 let uri_base = format!("file://{}", $base);
401 #[allow(unused_variables)]
402 let result = {
403 let x = $result;
404 $(let x = $uri_result;)?
405 x
406 };
407 let uri_result = format!("file://{result}");
408 assert_eq!(
409 PlPath::new(uri_base.as_str())
410 .as_ref()
411 .join($added)
412 .to_str(),
413 uri_result.as_str()
414 );
415 };
416 }
417
418 assert_plpath_join!("a/b/c/" + "d/e" => "a/b/c/d/e");
419 assert_plpath_join!("a/b/c" + "d/e" => "a/b/c/d/e");
420 assert_plpath_join!("a/b/c" + "d/e/" => "a/b/c/d/e/");
421 assert_plpath_join!("a/b/c" + "" => "a/b/c");
422 assert_plpath_join!("a/b/c" + "/d" => "/d", "a/b/c/d");
423 assert_plpath_join!("a/b/c" + "/d/" => "/d/", "a/b/c/d/");
424 assert_plpath_join!("" + "/d/" => "/d/");
425 assert_plpath_join!("/" + "/d/" => "/d/", "//d/");
426 assert_plpath_join!("/x/y" + "/d/" => "/d/", "/x/y/d/");
427 assert_plpath_join!("/x/y" + "/d" => "/d", "/x/y/d");
428 assert_plpath_join!("/x/y" + "d" => "/x/y/d");
429
430 assert_plpath_join!("/a/longer" + "path" => "/a/longer/path");
431 assert_plpath_join!("/a/longer" + "/path" => "/path", "/a/longer/path");
432 assert_plpath_join!("/a/longer" + "path/wow" => "/a/longer/path/wow");
433 assert_plpath_join!("/a/longer" + "/path/wow" => "/path/wow", "/a/longer/path/wow");
434 assert_plpath_join!("/an/even/longer" + "path" => "/an/even/longer/path");
435 assert_plpath_join!("/an/even/longer" + "/path" => "/path", "/an/even/longer/path");
436 assert_plpath_join!("/an/even/longer" + "path/wow" => "/an/even/longer/path/wow");
437 assert_plpath_join!("/an/even/longer" + "/path/wow" => "/path/wow", "/an/even/longer/path/wow");
438 }
439}