polars_sql/
functions.rs

1use std::ops::{Add, Sub};
2
3use polars_core::chunked_array::ops::{SortMultipleOptions, SortOptions};
4use polars_core::prelude::{
5    DataType, ExplodeOptions, PolarsResult, QuantileMethod, Schema, TimeUnit, polars_bail,
6    polars_err,
7};
8use polars_lazy::dsl::Expr;
9#[cfg(feature = "rank")]
10use polars_lazy::prelude::{RankMethod, RankOptions};
11use polars_ops::chunked_array::UnicodeForm;
12use polars_ops::series::RoundMode;
13use polars_plan::dsl::{
14    as_struct, coalesce, concat_str, element, int_range, len, max_horizontal, min_horizontal, when,
15};
16use polars_plan::plans::{DynLiteralValue, LiteralValue, typed_lit};
17use polars_plan::prelude::{StrptimeOptions, col, cols, lit};
18use polars_utils::pl_str::PlSmallStr;
19use sqlparser::ast::helpers::attached_token::AttachedToken;
20use sqlparser::ast::{
21    DateTimeField, DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg,
22    FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, Ident,
23    OrderByExpr, Value as SQLValue, ValueWithSpan, WindowFrame, WindowFrameBound, WindowFrameUnits,
24    WindowSpec, WindowType,
25};
26use sqlparser::tokenizer::Span;
27
28use crate::SQLContext;
29use crate::sql_expr::{adjust_one_indexed_param, parse_extract_date_part, parse_sql_expr};
30
31pub(crate) struct SQLFunctionVisitor<'a> {
32    pub(crate) func: &'a SQLFunction,
33    pub(crate) ctx: &'a mut SQLContext,
34    pub(crate) active_schema: Option<&'a Schema>,
35}
36
37/// SQL functions that are supported by Polars
38pub(crate) enum PolarsSQLFunctions {
39    // ----
40    // Bitwise functions
41    // ----
42    /// SQL 'bit_and' function.
43    /// Returns the bitwise AND of the input expressions.
44    /// ```sql
45    /// SELECT BIT_AND(col1, col2) FROM df;
46    /// ```
47    BitAnd,
48    /// SQL 'bit_count' function.
49    /// Returns the number of set bits in the input expression.
50    /// ```sql
51    /// SELECT BIT_COUNT(col1) FROM df;
52    /// ```
53    #[cfg(feature = "bitwise")]
54    BitCount,
55    /// SQL 'bit_or' function.
56    /// Returns the bitwise OR of the input expressions.
57    /// ```sql
58    /// SELECT BIT_OR(col1, col2) FROM df;
59    /// ```
60    BitNot,
61    /// SQL 'bit_not' function.
62    /// Returns the bitwise Not of the input expression.
63    /// ```sql
64    /// SELECT BIT_Not(col1) FROM df;
65    /// ```
66    BitOr,
67    /// SQL 'bit_xor' function.
68    /// Returns the bitwise XOR of the input expressions.
69    /// ```sql
70    /// SELECT BIT_XOR(col1, col2) FROM df;
71    /// ```
72    BitXor,
73
74    // ----
75    // Math functions
76    // ----
77    /// SQL 'abs' function.
78    /// Returns the absolute value of the input expression.
79    /// ```sql
80    /// SELECT ABS(col1) FROM df;
81    /// ```
82    Abs,
83    /// SQL 'ceil' function.
84    /// Returns the nearest integer closest from zero.
85    /// ```sql
86    /// SELECT CEIL(col1) FROM df;
87    /// ```
88    Ceil,
89    /// SQL 'div' function.
90    /// Returns the integer quotient of the division.
91    /// ```sql
92    /// SELECT DIV(col1, 2) FROM df;
93    /// ```
94    Div,
95    /// SQL 'exp' function.
96    /// Computes the exponential of the given value.
97    /// ```sql
98    /// SELECT EXP(col1) FROM df;
99    /// ```
100    Exp,
101    /// SQL 'floor' function.
102    /// Returns the nearest integer away from zero.
103    ///   0.5 will be rounded
104    /// ```sql
105    /// SELECT FLOOR(col1) FROM df;
106    /// ```
107    Floor,
108    /// SQL 'pi' function.
109    /// Returns a (very good) approximation of 𝜋.
110    /// ```sql
111    /// SELECT PI() FROM df;
112    /// ```
113    Pi,
114    /// SQL 'ln' function.
115    /// Computes the natural logarithm of the given value.
116    /// ```sql
117    /// SELECT LN(col1) FROM df;
118    /// ```
119    Ln,
120    /// SQL 'log2' function.
121    /// Computes the logarithm of the given value in base 2.
122    /// ```sql
123    /// SELECT LOG2(col1) FROM df;
124    /// ```
125    Log2,
126    /// SQL 'log10' function.
127    /// Computes the logarithm of the given value in base 10.
128    /// ```sql
129    /// SELECT LOG10(col1) FROM df;
130    /// ```
131    Log10,
132    /// SQL 'log' function.
133    /// Computes the `base` logarithm of the given value.
134    /// ```sql
135    /// SELECT LOG(col1, 10) FROM df;
136    /// ```
137    Log,
138    /// SQL 'log1p' function.
139    /// Computes the natural logarithm of "given value plus one".
140    /// ```sql
141    /// SELECT LOG1P(col1) FROM df;
142    /// ```
143    Log1p,
144    /// SQL 'pow' function.
145    /// Returns the value to the power of the given exponent.
146    /// ```sql
147    /// SELECT POW(col1, 2) FROM df;
148    /// ```
149    Pow,
150    /// SQL 'mod' function.
151    /// Returns the remainder of a numeric expression divided by another numeric expression.
152    /// ```sql
153    /// SELECT MOD(col1, 2) FROM df;
154    /// ```
155    Mod,
156    /// SQL 'sqrt' function.
157    /// Returns the square root (√) of a number.
158    /// ```sql
159    /// SELECT SQRT(col1) FROM df;
160    /// ```
161    Sqrt,
162    /// SQL 'cbrt' function.
163    /// Returns the cube root (∛) of a number.
164    /// ```sql
165    /// SELECT CBRT(col1) FROM df;
166    /// ```
167    Cbrt,
168    /// SQL 'round' function.
169    /// Round a number to `x` decimals (default: 0) away from zero.
170    ///   .5 is rounded away from zero.
171    /// ```sql
172    /// SELECT ROUND(col1, 3) FROM df;
173    /// ```
174    Round,
175    /// SQL 'sign' function.
176    /// Returns the sign of the argument as -1, 0, or +1.
177    /// ```sql
178    /// SELECT SIGN(col1) FROM df;
179    /// ```
180    Sign,
181
182    // ----
183    // Trig functions
184    // ----
185    /// SQL 'cos' function.
186    /// Compute the cosine sine of the input expression (in radians).
187    /// ```sql
188    /// SELECT COS(col1) FROM df;
189    /// ```
190    Cos,
191    /// SQL 'cot' function.
192    /// Compute the cotangent of the input expression (in radians).
193    /// ```sql
194    /// SELECT COT(col1) FROM df;
195    /// ```
196    Cot,
197    /// SQL 'sin' function.
198    /// Compute the sine of the input expression (in radians).
199    /// ```sql
200    /// SELECT SIN(col1) FROM df;
201    /// ```
202    Sin,
203    /// SQL 'tan' function.
204    /// Compute the tangent of the input expression (in radians).
205    /// ```sql
206    /// SELECT TAN(col1) FROM df;
207    /// ```
208    Tan,
209    /// SQL 'cosd' function.
210    /// Compute the cosine sine of the input expression (in degrees).
211    /// ```sql
212    /// SELECT COSD(col1) FROM df;
213    /// ```
214    CosD,
215    /// SQL 'cotd' function.
216    /// Compute cotangent of the input expression (in degrees).
217    /// ```sql
218    /// SELECT COTD(col1) FROM df;
219    /// ```
220    CotD,
221    /// SQL 'sind' function.
222    /// Compute the sine of the input expression (in degrees).
223    /// ```sql
224    /// SELECT SIND(col1) FROM df;
225    /// ```
226    SinD,
227    /// SQL 'tand' function.
228    /// Compute the tangent of the input expression (in degrees).
229    /// ```sql
230    /// SELECT TAND(col1) FROM df;
231    /// ```
232    TanD,
233    /// SQL 'acos' function.
234    /// Compute inverse cosine of the input expression (in radians).
235    /// ```sql
236    /// SELECT ACOS(col1) FROM df;
237    /// ```
238    Acos,
239    /// SQL 'asin' function.
240    /// Compute inverse sine of the input expression (in radians).
241    /// ```sql
242    /// SELECT ASIN(col1) FROM df;
243    /// ```
244    Asin,
245    /// SQL 'atan' function.
246    /// Compute inverse tangent of the input expression (in radians).
247    /// ```sql
248    /// SELECT ATAN(col1) FROM df;
249    /// ```
250    Atan,
251    /// SQL 'atan2' function.
252    /// Compute the inverse tangent of col1/col2 (in radians).
253    /// ```sql
254    /// SELECT ATAN2(col1, col2) FROM df;
255    /// ```
256    Atan2,
257    /// SQL 'acosd' function.
258    /// Compute inverse cosine of the input expression (in degrees).
259    /// ```sql
260    /// SELECT ACOSD(col1) FROM df;
261    /// ```
262    AcosD,
263    /// SQL 'asind' function.
264    /// Compute inverse sine of the input expression (in degrees).
265    /// ```sql
266    /// SELECT ASIND(col1) FROM df;
267    /// ```
268    AsinD,
269    /// SQL 'atand' function.
270    /// Compute inverse tangent of the input expression (in degrees).
271    /// ```sql
272    /// SELECT ATAND(col1) FROM df;
273    /// ```
274    AtanD,
275    /// SQL 'atan2d' function.
276    /// Compute the inverse tangent of col1/col2 (in degrees).
277    /// ```sql
278    /// SELECT ATAN2D(col1) FROM df;
279    /// ```
280    Atan2D,
281    /// SQL 'degrees' function.
282    /// Convert between radians and degrees.
283    /// ```sql
284    /// SELECT DEGREES(col1) FROM df;
285    /// ```
286    ///
287    ///
288    Degrees,
289    /// SQL 'RADIANS' function.
290    /// Convert between degrees and radians.
291    /// ```sql
292    /// SELECT RADIANS(col1) FROM df;
293    /// ```
294    Radians,
295
296    // ----
297    // Temporal functions
298    // ----
299    /// SQL 'date_part' function.
300    /// Extracts a part of a date (or datetime) such as 'year', 'month', etc.
301    /// ```sql
302    /// SELECT DATE_PART('year', col1) FROM df;
303    /// SELECT DATE_PART('day', col1) FROM df;
304    DatePart,
305    /// SQL 'strftime' function.
306    /// Converts a datetime to a string using a format string.
307    /// ```sql
308    /// SELECT STRFTIME(col1, '%d-%m-%Y %H:%M') FROM df;
309    /// ```
310    Strftime,
311
312    // ----
313    // String functions
314    // ----
315    /// SQL 'bit_length' function (bytes).
316    /// ```sql
317    /// SELECT BIT_LENGTH(col1) FROM df;
318    /// ```
319    BitLength,
320    /// SQL 'concat' function.
321    /// Returns all input expressions concatenated together as a string.
322    /// ```sql
323    /// SELECT CONCAT(col1, col2) FROM df;
324    /// ```
325    Concat,
326    /// SQL 'concat_ws' function.
327    /// Returns all input expressions concatenated together
328    /// (and interleaved with a separator) as a string.
329    /// ```sql
330    /// SELECT CONCAT_WS(':', col1, col2, col3) FROM df;
331    /// ```
332    ConcatWS,
333    /// SQL 'date' function.
334    /// Converts a formatted string date to an actual Date type; ISO-8601 format is assumed
335    /// unless a strftime-compatible formatting string is provided as the second parameter.
336    /// ```sql
337    /// SELECT DATE('2021-03-15') FROM df;
338    /// SELECT DATE('2021-15-03', '%Y-d%-%m') FROM df;
339    /// SELECT DATE('2021-03', '%Y-%m') FROM df;
340    /// ```
341    Date,
342    /// SQL 'ends_with' function.
343    /// Returns True if the value ends with the second argument.
344    /// ```sql
345    /// SELECT ENDS_WITH(col1, 'a') FROM df;
346    /// SELECT col2 from df WHERE ENDS_WITH(col1, 'a');
347    /// ```
348    EndsWith,
349    /// SQL 'initcap' function.
350    /// Returns the value with the first letter capitalized.
351    /// ```sql
352    /// SELECT INITCAP(col1) FROM df;
353    /// ```
354    #[cfg(feature = "nightly")]
355    InitCap,
356    /// SQL 'left' function.
357    /// Returns the first (leftmost) `n` characters.
358    /// ```sql
359    /// SELECT LEFT(col1, 3) FROM df;
360    /// ```
361    Left,
362    /// SQL 'length' function (characters.
363    /// Returns the character length of the string.
364    /// ```sql
365    /// SELECT LENGTH(col1) FROM df;
366    /// ```
367    Length,
368    /// SQL 'lower' function.
369    /// Returns an lowercased column.
370    /// ```sql
371    /// SELECT LOWER(col1) FROM df;
372    /// ```
373    Lower,
374    /// SQL 'ltrim' function.
375    /// Strip whitespaces from the left.
376    /// ```sql
377    /// SELECT LTRIM(col1) FROM df;
378    /// ```
379    LTrim,
380    /// SQL 'normalize' function.
381    /// Convert string to Unicode normalization form
382    /// (one of NFC, NFKC, NFD, or NFKD - unquoted).
383    /// ```sql
384    /// SELECT NORMALIZE(col1, NFC) FROM df;
385    /// ```
386    Normalize,
387    /// SQL 'octet_length' function.
388    /// Returns the length of a given string in bytes.
389    /// ```sql
390    /// SELECT OCTET_LENGTH(col1) FROM df;
391    /// ```
392    OctetLength,
393    /// SQL 'regexp_like' function.
394    /// True if `pattern` matches the value (optional: `flags`).
395    /// ```sql
396    /// SELECT REGEXP_LIKE(col1, 'xyz', 'i') FROM df;
397    /// ```
398    RegexpLike,
399    /// SQL 'replace' function.
400    /// Replace a given substring with another string.
401    /// ```sql
402    /// SELECT REPLACE(col1, 'old', 'new') FROM df;
403    /// ```
404    Replace,
405    /// SQL 'reverse' function.
406    /// Return the reversed string.
407    /// ```sql
408    /// SELECT REVERSE(col1) FROM df;
409    /// ```
410    Reverse,
411    /// SQL 'right' function.
412    /// Returns the last (rightmost) `n` characters.
413    /// ```sql
414    /// SELECT RIGHT(col1, 3) FROM df;
415    /// ```
416    Right,
417    /// SQL 'rtrim' function.
418    /// Strip whitespaces from the right.
419    /// ```sql
420    /// SELECT RTRIM(col1) FROM df;
421    /// ```
422    RTrim,
423    /// SQL 'split_part' function.
424    /// Splits a string into an array of strings using the given delimiter
425    /// and returns the `n`-th part (1-indexed).
426    /// ```sql
427    /// SELECT SPLIT_PART(col1, ',', 2) FROM df;
428    /// ```
429    SplitPart,
430    /// SQL 'starts_with' function.
431    /// Returns True if the value starts with the second argument.
432    /// ```sql
433    /// SELECT STARTS_WITH(col1, 'a') FROM df;
434    /// SELECT col2 from df WHERE STARTS_WITH(col1, 'a');
435    /// ```
436    StartsWith,
437    /// SQL 'strpos' function.
438    /// Returns the index of the given substring in the target string.
439    /// ```sql
440    /// SELECT STRPOS(col1,'xyz') FROM df;
441    /// ```
442    StrPos,
443    /// SQL 'substr' function.
444    /// Returns a portion of the data (first character = 1) in the range.
445    ///   \[start, start + length]
446    /// ```sql
447    /// SELECT SUBSTR(col1, 3, 5) FROM df;
448    /// ```
449    Substring,
450    /// SQL 'string_to_array' function.
451    /// Splits a string into an array of strings using the given delimiter.
452    /// ```sql
453    /// SELECT STRING_TO_ARRAY(col1, ',') FROM df;
454    /// ```
455    StringToArray,
456    /// SQL 'strptime' function.
457    /// Converts a string to a datetime using a format string.
458    /// ```sql
459    /// SELECT STRPTIME(col1, '%d-%m-%Y %H:%M') FROM df;
460    /// ```
461    Strptime,
462    /// SQL 'time' function.
463    /// Converts a formatted string time to an actual Time type; ISO-8601 format is
464    /// assumed unless a strftime-compatible formatting string is provided as the second
465    /// parameter.
466    /// ```sql
467    /// SELECT TIME('10:30:45') FROM df;
468    /// SELECT TIME('20.30', '%H.%M') FROM df;
469    /// ```
470    Time,
471    /// SQL 'timestamp' function.
472    /// Converts a formatted string datetime to an actual Datetime type; ISO-8601 format is
473    /// assumed unless a strftime-compatible formatting string is provided as the second
474    /// parameter.
475    /// ```sql
476    /// SELECT TIMESTAMP('2021-03-15 10:30:45') FROM df;
477    /// SELECT TIMESTAMP('2021-15-03T00:01:02.333', '%Y-d%-%m %H:%M:%S') FROM df;
478    /// ```
479    Timestamp,
480    /// SQL 'upper' function.
481    /// Returns an uppercased column.
482    /// ```sql
483    /// SELECT UPPER(col1) FROM df;
484    /// ```
485    Upper,
486
487    // ----
488    // Conditional functions
489    // ----
490    /// SQL 'coalesce' function.
491    /// Returns the first non-null value in the provided values/columns.
492    /// ```sql
493    /// SELECT COALESCE(col1, ...) FROM df;
494    /// ```
495    Coalesce,
496    /// SQL 'greatest' function.
497    /// Returns the greatest value in the list of expressions.
498    /// ```sql
499    /// SELECT GREATEST(col1, col2, ...) FROM df;
500    /// ```
501    Greatest,
502    /// SQL 'if' function.
503    /// Returns expr1 if the boolean condition provided as the first
504    /// parameter evaluates to true, and expr2 otherwise.
505    /// ```sql
506    /// SELECT IF(column < 0, expr1, expr2) FROM df;
507    /// ```
508    If,
509    /// SQL 'ifnull' function.
510    /// If an expression value is NULL, return an alternative value.
511    /// ```sql
512    /// SELECT IFNULL(string_col, 'n/a') FROM df;
513    /// ```
514    IfNull,
515    /// SQL 'least' function.
516    /// Returns the smallest value in the list of expressions.
517    /// ```sql
518    /// SELECT LEAST(col1, col2, ...) FROM df;
519    /// ```
520    Least,
521    /// SQL 'nullif' function.
522    /// Returns NULL if two expressions are equal, otherwise returns the first.
523    /// ```sql
524    /// SELECT NULLIF(col1, col2) FROM df;
525    /// ```
526    NullIf,
527
528    // ----
529    // Aggregate functions
530    // ----
531    /// SQL 'avg' function.
532    /// Returns the average (mean) of all the elements in the grouping.
533    /// ```sql
534    /// SELECT AVG(col1) FROM df;
535    /// ```
536    Avg,
537    /// SQL 'corr' function.
538    /// Returns the Pearson correlation coefficient between two columns.
539    /// ```sql
540    /// SELECT CORR(col1, col2) FROM df;
541    /// ```
542    Corr,
543    /// SQL 'count' function.
544    /// Returns the amount of elements in the grouping.
545    /// ```sql
546    /// SELECT COUNT(col1) FROM df;
547    /// SELECT COUNT(*) FROM df;
548    /// SELECT COUNT(DISTINCT col1) FROM df;
549    /// SELECT COUNT(DISTINCT *) FROM df;
550    /// ```
551    Count,
552    /// SQL 'covar_pop' function.
553    /// Returns the population covariance between two columns.
554    /// ```sql
555    /// SELECT COVAR_POP(col1, col2) FROM df;
556    /// ```
557    CovarPop,
558    /// SQL 'covar_samp' function.
559    /// Returns the sample covariance between two columns.
560    /// ```sql
561    /// SELECT COVAR_SAMP(col1, col2) FROM df;
562    /// ```
563    CovarSamp,
564    /// SQL 'first' function.
565    /// Returns the first element of the grouping.
566    /// ```sql
567    /// SELECT FIRST(col1) FROM df;
568    /// ```
569    First,
570    /// SQL 'last' function.
571    /// Returns the last element of the grouping.
572    /// ```sql
573    /// SELECT LAST(col1) FROM df;
574    /// ```
575    Last,
576    /// SQL 'max' function.
577    /// Returns the greatest (maximum) of all the elements in the grouping.
578    /// ```sql
579    /// SELECT MAX(col1) FROM df;
580    /// ```
581    Max,
582    /// SQL 'median' function.
583    /// Returns the median element from the grouping.
584    /// ```sql
585    /// SELECT MEDIAN(col1) FROM df;
586    /// ```
587    Median,
588    /// SQL 'quantile_cont' function.
589    /// Returns the continuous quantile element from the grouping
590    /// (interpolated value between two closest values).
591    /// ```sql
592    /// SELECT QUANTILE_CONT(col1) FROM df;
593    /// ```
594    QuantileCont,
595    /// SQL 'quantile_disc' function.
596    /// Divides the [0, 1] interval into equal-length subintervals, each corresponding to a value,
597    /// and returns the value associated with the subinterval where the quantile value falls.
598    /// ```sql
599    /// SELECT QUANTILE_DISC(col1) FROM df;
600    /// ```
601    QuantileDisc,
602    /// SQL 'min' function.
603    /// Returns the smallest (minimum) of all the elements in the grouping.
604    /// ```sql
605    /// SELECT MIN(col1) FROM df;
606    /// ```
607    Min,
608    /// SQL 'stddev' function.
609    /// Returns the standard deviation of all the elements in the grouping.
610    /// ```sql
611    /// SELECT STDDEV(col1) FROM df;
612    /// ```
613    StdDev,
614    /// SQL 'sum' function.
615    /// Returns the sum of all the elements in the grouping.
616    /// ```sql
617    /// SELECT SUM(col1) FROM df;
618    /// ```
619    Sum,
620    /// SQL 'variance' function.
621    /// Returns the variance of all the elements in the grouping.
622    /// ```sql
623    /// SELECT VARIANCE(col1) FROM df;
624    /// ```
625    Variance,
626
627    // ----
628    // Array functions
629    // ----
630    /// SQL 'array_length' function.
631    /// Returns the length of the array.
632    /// ```sql
633    /// SELECT ARRAY_LENGTH(col1) FROM df;
634    /// ```
635    ArrayLength,
636    /// SQL 'array_lower' function.
637    /// Returns the minimum value in an array; equivalent to `array_min`.
638    /// ```sql
639    /// SELECT ARRAY_LOWER(col1) FROM df;
640    /// ```
641    ArrayMin,
642    /// SQL 'array_upper' function.
643    /// Returns the maximum value in an array; equivalent to `array_max`.
644    /// ```sql
645    /// SELECT ARRAY_UPPER(col1) FROM df;
646    /// ```
647    ArrayMax,
648    /// SQL 'array_sum' function.
649    /// Returns the sum of all values in an array.
650    /// ```sql
651    /// SELECT ARRAY_SUM(col1) FROM df;
652    /// ```
653    ArraySum,
654    /// SQL 'array_mean' function.
655    /// Returns the mean of all values in an array.
656    /// ```sql
657    /// SELECT ARRAY_MEAN(col1) FROM df;
658    /// ```
659    ArrayMean,
660    /// SQL 'array_reverse' function.
661    /// Returns the array with the elements in reverse order.
662    /// ```sql
663    /// SELECT ARRAY_REVERSE(col1) FROM df;
664    /// ```
665    ArrayReverse,
666    /// SQL 'array_unique' function.
667    /// Returns the array with the unique elements.
668    /// ```sql
669    /// SELECT ARRAY_UNIQUE(col1) FROM df;
670    /// ```
671    ArrayUnique,
672    /// SQL 'unnest' function.
673    /// Unnest/explodes an array column into multiple rows.
674    /// ```sql
675    /// SELECT unnest(col1) FROM df;
676    /// ```
677    Explode,
678    /// SQL 'array_agg' function.
679    /// Concatenates the input expressions, including nulls, into an array.
680    /// ```sql
681    /// SELECT ARRAY_AGG(col1, col2, ...) FROM df;
682    /// ```
683    ArrayAgg,
684    /// SQL 'array_to_string' function.
685    /// Takes all elements of the array and joins them into one string.
686    /// ```sql
687    /// SELECT ARRAY_TO_STRING(col1, ',') FROM df;
688    /// SELECT ARRAY_TO_STRING(col1, ',', 'n/a') FROM df;
689    /// ```
690    ArrayToString,
691    /// SQL 'array_get' function.
692    /// Returns the value at the given index in the array.
693    /// ```sql
694    /// SELECT ARRAY_GET(col1, 1) FROM df;
695    /// ```
696    ArrayGet,
697    /// SQL 'array_contains' function.
698    /// Returns true if the array contains the value.
699    /// ```sql
700    /// SELECT ARRAY_CONTAINS(col1, 'foo') FROM df;
701    /// ```
702    ArrayContains,
703
704    // ----
705    // Window functions
706    // ----
707    /// SQL 'first_value' window function.
708    /// Returns the first value in an ordered set of values (respecting window frame).
709    /// ```sql
710    /// SELECT FIRST_VALUE(col1) OVER (PARTITION BY category ORDER BY id) FROM df;
711    /// ```
712    FirstValue,
713    /// SQL 'last_value' window function.
714    /// Returns the last value in an ordered set of values (respecting window frame).
715    /// With default frame, returns the current row's value.
716    /// ```sql
717    /// SELECT LAST_VALUE(col1) OVER (PARTITION BY category ORDER BY id) FROM df;
718    /// ```
719    LastValue,
720    /// SQL 'lag' function.
721    /// Returns the value of the expression evaluated at the row n rows before the current row.
722    /// ```sql
723    /// SELECT lag(column_1, 1) OVER (PARTITION BY column_2 ORDER BY column_3) FROM df;
724    /// ```
725    Lag,
726    /// SQL 'lead' function.
727    /// Returns the value of the expression evaluated at the row n rows after the current row.
728    /// ```sql
729    /// SELECT lead(column_1, 1) OVER (PARTITION BY column_2 ORDER BY column_3) FROM df;
730    /// ```
731    Lead,
732    /// SQL 'row_number' function.
733    /// Returns the sequential row number within a window partition, starting from 1.
734    /// ```sql
735    /// SELECT ROW_NUMBER() OVER (ORDER BY col1) FROM df;
736    /// SELECT ROW_NUMBER() OVER (PARTITION BY col1 ORDER BY col2) FROM df;
737    /// ```
738    RowNumber,
739    /// SQL 'rank' function.
740    /// Returns the rank of each row within a window partition, with gaps for ties.
741    /// Rows with equal values receive the same rank, and the next rank skips numbers.
742    /// ```sql
743    /// SELECT RANK() OVER (ORDER BY col1) FROM df;
744    /// SELECT RANK() OVER (PARTITION BY col1 ORDER BY col2 DESC) FROM df;
745    /// ```
746    #[cfg(feature = "rank")]
747    Rank,
748    /// SQL 'dense_rank' function.
749    /// Returns the rank of each row within a window partition, without gaps for ties.
750    /// Rows with equal values receive the same rank, and the next rank is consecutive.
751    /// ```sql
752    /// SELECT DENSE_RANK() OVER (ORDER BY col1) FROM df;
753    /// SELECT DENSE_RANK() OVER (PARTITION BY col1 ORDER BY col2 DESC) FROM df;
754    /// ```
755    #[cfg(feature = "rank")]
756    DenseRank,
757
758    // ----
759    // Column selection
760    // ----
761    Columns,
762
763    // ----
764    // User-defined
765    // ----
766    Udf(String),
767}
768
769impl PolarsSQLFunctions {
770    pub(crate) fn keywords() -> &'static [&'static str] {
771        &[
772            "abs",
773            "acos",
774            "acosd",
775            "array_contains",
776            "array_get",
777            "array_length",
778            "array_lower",
779            "array_mean",
780            "array_reverse",
781            "array_sum",
782            "array_to_string",
783            "array_unique",
784            "array_upper",
785            "asin",
786            "asind",
787            "atan",
788            "atan2",
789            "atan2d",
790            "atand",
791            "avg",
792            "bit_and",
793            "bit_count",
794            "bit_length",
795            "bit_or",
796            "bit_xor",
797            "cbrt",
798            "ceil",
799            "ceiling",
800            "char_length",
801            "character_length",
802            "coalesce",
803            "columns",
804            "concat",
805            "concat_ws",
806            "corr",
807            "cos",
808            "cosd",
809            "cot",
810            "cotd",
811            "count",
812            "covar",
813            "covar_pop",
814            "covar_samp",
815            "date",
816            "date_part",
817            "degrees",
818            "dense_rank",
819            "ends_with",
820            "exp",
821            "first",
822            "first_value",
823            "floor",
824            "greatest",
825            "if",
826            "ifnull",
827            "initcap",
828            "lag",
829            "last",
830            "last_value",
831            "lead",
832            "least",
833            "left",
834            "length",
835            "ln",
836            "log",
837            "log10",
838            "log1p",
839            "log2",
840            "lower",
841            "ltrim",
842            "max",
843            "median",
844            "quantile_disc",
845            "min",
846            "mod",
847            "nullif",
848            "octet_length",
849            "pi",
850            "pow",
851            "power",
852            "quantile_cont",
853            "quantile_disc",
854            "radians",
855            "rank",
856            "regexp_like",
857            "replace",
858            "reverse",
859            "right",
860            "round",
861            "row_number",
862            "rtrim",
863            "sign",
864            "sin",
865            "sind",
866            "sqrt",
867            "starts_with",
868            "stddev",
869            "stddev_samp",
870            "stdev",
871            "stdev_samp",
872            "strftime",
873            "strpos",
874            "strptime",
875            "substr",
876            "sum",
877            "tan",
878            "tand",
879            "unnest",
880            "upper",
881            "var",
882            "var_samp",
883            "variance",
884        ]
885    }
886}
887
888impl PolarsSQLFunctions {
889    fn try_from_sql(function: &'_ SQLFunction, ctx: &'_ SQLContext) -> PolarsResult<Self> {
890        let function_name = function.name.0[0].as_ident().unwrap().value.to_lowercase();
891        Ok(match function_name.as_str() {
892            // ----
893            // Bitwise functions
894            // ----
895            "bit_and" | "bitand" => Self::BitAnd,
896            #[cfg(feature = "bitwise")]
897            "bit_count" | "bitcount" => Self::BitCount,
898            "bit_not" | "bitnot" => Self::BitNot,
899            "bit_or" | "bitor" => Self::BitOr,
900            "bit_xor" | "bitxor" | "xor" => Self::BitXor,
901
902            // ----
903            // Math functions
904            // ----
905            "abs" => Self::Abs,
906            "cbrt" => Self::Cbrt,
907            "ceil" | "ceiling" => Self::Ceil,
908            "div" => Self::Div,
909            "exp" => Self::Exp,
910            "floor" => Self::Floor,
911            "ln" => Self::Ln,
912            "log" => Self::Log,
913            "log10" => Self::Log10,
914            "log1p" => Self::Log1p,
915            "log2" => Self::Log2,
916            "mod" => Self::Mod,
917            "pi" => Self::Pi,
918            "pow" | "power" => Self::Pow,
919            "round" => Self::Round,
920            "sign" => Self::Sign,
921            "sqrt" => Self::Sqrt,
922
923            // ----
924            // Trig functions
925            // ----
926            "cos" => Self::Cos,
927            "cot" => Self::Cot,
928            "sin" => Self::Sin,
929            "tan" => Self::Tan,
930            "cosd" => Self::CosD,
931            "cotd" => Self::CotD,
932            "sind" => Self::SinD,
933            "tand" => Self::TanD,
934            "acos" => Self::Acos,
935            "asin" => Self::Asin,
936            "atan" => Self::Atan,
937            "atan2" => Self::Atan2,
938            "acosd" => Self::AcosD,
939            "asind" => Self::AsinD,
940            "atand" => Self::AtanD,
941            "atan2d" => Self::Atan2D,
942            "degrees" => Self::Degrees,
943            "radians" => Self::Radians,
944
945            // ----
946            // Conditional functions
947            // ----
948            "coalesce" => Self::Coalesce,
949            "greatest" => Self::Greatest,
950            "if" => Self::If,
951            "ifnull" => Self::IfNull,
952            "least" => Self::Least,
953            "nullif" => Self::NullIf,
954
955            // ----
956            // Date functions
957            // ----
958            "date_part" => Self::DatePart,
959            "strftime" => Self::Strftime,
960
961            // ----
962            // String functions
963            // ----
964            "bit_length" => Self::BitLength,
965            "concat" => Self::Concat,
966            "concat_ws" => Self::ConcatWS,
967            "date" => Self::Date,
968            "timestamp" | "datetime" => Self::Timestamp,
969            "ends_with" => Self::EndsWith,
970            #[cfg(feature = "nightly")]
971            "initcap" => Self::InitCap,
972            "length" | "char_length" | "character_length" => Self::Length,
973            "left" => Self::Left,
974            "lower" => Self::Lower,
975            "ltrim" => Self::LTrim,
976            "normalize" => Self::Normalize,
977            "octet_length" => Self::OctetLength,
978            "strpos" => Self::StrPos,
979            "regexp_like" => Self::RegexpLike,
980            "replace" => Self::Replace,
981            "reverse" => Self::Reverse,
982            "right" => Self::Right,
983            "rtrim" => Self::RTrim,
984            "split_part" => Self::SplitPart,
985            "starts_with" => Self::StartsWith,
986            "string_to_array" => Self::StringToArray,
987            "strptime" => Self::Strptime,
988            "substr" => Self::Substring,
989            "time" => Self::Time,
990            "upper" => Self::Upper,
991
992            // ----
993            // Aggregate functions
994            // ----
995            "avg" => Self::Avg,
996            "corr" => Self::Corr,
997            "count" => Self::Count,
998            "covar_pop" => Self::CovarPop,
999            "covar" | "covar_samp" => Self::CovarSamp,
1000            "first" => Self::First,
1001            "last" => Self::Last,
1002            "max" => Self::Max,
1003            "median" => Self::Median,
1004            "quantile_cont" => Self::QuantileCont,
1005            "quantile_disc" => Self::QuantileDisc,
1006            "min" => Self::Min,
1007            "stdev" | "stddev" | "stdev_samp" | "stddev_samp" => Self::StdDev,
1008            "sum" => Self::Sum,
1009            "var" | "variance" | "var_samp" => Self::Variance,
1010
1011            // ----
1012            // Array functions
1013            // ----
1014            "array_agg" => Self::ArrayAgg,
1015            "array_contains" => Self::ArrayContains,
1016            "array_get" => Self::ArrayGet,
1017            "array_length" => Self::ArrayLength,
1018            "array_lower" => Self::ArrayMin,
1019            "array_mean" => Self::ArrayMean,
1020            "array_reverse" => Self::ArrayReverse,
1021            "array_sum" => Self::ArraySum,
1022            "array_to_string" => Self::ArrayToString,
1023            "array_unique" => Self::ArrayUnique,
1024            "array_upper" => Self::ArrayMax,
1025            "unnest" => Self::Explode,
1026
1027            // ----
1028            // Window functions
1029            // ----
1030            #[cfg(feature = "rank")]
1031            "dense_rank" => Self::DenseRank,
1032            "first_value" => Self::FirstValue,
1033            "last_value" => Self::LastValue,
1034            "lag" => Self::Lag,
1035            "lead" => Self::Lead,
1036            #[cfg(feature = "rank")]
1037            "rank" => Self::Rank,
1038            "row_number" => Self::RowNumber,
1039
1040            // ----
1041            // Column selection
1042            // ----
1043            "columns" => Self::Columns,
1044
1045            other => {
1046                if ctx.function_registry.contains(other) {
1047                    Self::Udf(other.to_string())
1048                } else {
1049                    polars_bail!(SQLInterface: "unsupported function '{}'", other);
1050                }
1051            },
1052        })
1053    }
1054}
1055
1056impl SQLFunctionVisitor<'_> {
1057    pub(crate) fn visit_function(&mut self) -> PolarsResult<Expr> {
1058        use PolarsSQLFunctions::*;
1059        use polars_lazy::prelude::Literal;
1060
1061        let function_name = PolarsSQLFunctions::try_from_sql(self.func, self.ctx)?;
1062        let function = self.func;
1063
1064        // TODO: implement the following functions where possible
1065        if !function.within_group.is_empty() {
1066            polars_bail!(SQLInterface: "'WITHIN GROUP' is not currently supported")
1067        }
1068        if function.filter.is_some() {
1069            polars_bail!(SQLInterface: "'FILTER' is not currently supported")
1070        }
1071        if function.null_treatment.is_some() {
1072            polars_bail!(SQLInterface: "'IGNORE|RESPECT NULLS' is not currently supported")
1073        }
1074
1075        let log_with_base =
1076            |e: Expr, base: f64| e.log(LiteralValue::Dyn(DynLiteralValue::Float(base)).lit());
1077
1078        match function_name {
1079            // ----
1080            // Bitwise functions
1081            // ----
1082            BitAnd => self.visit_binary::<Expr>(Expr::and),
1083            #[cfg(feature = "bitwise")]
1084            BitCount => self.visit_unary(Expr::bitwise_count_ones),
1085            BitNot => self.visit_unary(Expr::not),
1086            BitOr => self.visit_binary::<Expr>(Expr::or),
1087            BitXor => self.visit_binary::<Expr>(Expr::xor),
1088
1089            // ----
1090            // Math functions
1091            // ----
1092            Abs => self.visit_unary(Expr::abs),
1093            Cbrt => self.visit_unary(Expr::cbrt),
1094            Ceil => self.visit_unary(Expr::ceil),
1095            Div => self.visit_binary(|e, d| e.floor_div(d).cast(DataType::Int64)),
1096            Exp => self.visit_unary(Expr::exp),
1097            Floor => self.visit_unary(Expr::floor),
1098            Ln => self.visit_unary(|e| log_with_base(e, std::f64::consts::E)),
1099            Log => self.visit_binary(Expr::log),
1100            Log10 => self.visit_unary(|e| log_with_base(e, 10.0)),
1101            Log1p => self.visit_unary(Expr::log1p),
1102            Log2 => self.visit_unary(|e| log_with_base(e, 2.0)),
1103            Pi => self.visit_nullary(Expr::pi),
1104            Mod => self.visit_binary(|e1, e2| e1 % e2),
1105            Pow => self.visit_binary::<Expr>(Expr::pow),
1106            Round => {
1107                let args = extract_args(function)?;
1108                match args.len() {
1109                    1 => self.visit_unary(|e| e.round(0, RoundMode::default())),
1110                    2 => self.try_visit_binary(|e, decimals| {
1111                        Ok(e.round(match decimals {
1112                            Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))) => {
1113                                if n >= 0 { n as u32 } else {
1114                                    polars_bail!(SQLInterface: "ROUND does not currently support negative decimals value ({})", args[1])
1115                                }
1116                            },
1117                            _ => polars_bail!(SQLSyntax: "invalid value for ROUND decimals ({})", args[1]),
1118                        }, RoundMode::default()))
1119                    }),
1120                    _ => polars_bail!(SQLSyntax: "ROUND expects 1-2 arguments (found {})", args.len()),
1121                }
1122            },
1123            Sign => self.visit_unary(Expr::sign),
1124            Sqrt => self.visit_unary(Expr::sqrt),
1125
1126            // ----
1127            // Trig functions
1128            // ----
1129            Acos => self.visit_unary(Expr::arccos),
1130            AcosD => self.visit_unary(|e| e.arccos().degrees()),
1131            Asin => self.visit_unary(Expr::arcsin),
1132            AsinD => self.visit_unary(|e| e.arcsin().degrees()),
1133            Atan => self.visit_unary(Expr::arctan),
1134            Atan2 => self.visit_binary(Expr::arctan2),
1135            Atan2D => self.visit_binary(|e, s| e.arctan2(s).degrees()),
1136            AtanD => self.visit_unary(|e| e.arctan().degrees()),
1137            Cos => self.visit_unary(Expr::cos),
1138            CosD => self.visit_unary(|e| e.radians().cos()),
1139            Cot => self.visit_unary(Expr::cot),
1140            CotD => self.visit_unary(|e| e.radians().cot()),
1141            Degrees => self.visit_unary(Expr::degrees),
1142            Radians => self.visit_unary(Expr::radians),
1143            Sin => self.visit_unary(Expr::sin),
1144            SinD => self.visit_unary(|e| e.radians().sin()),
1145            Tan => self.visit_unary(Expr::tan),
1146            TanD => self.visit_unary(|e| e.radians().tan()),
1147
1148            // ----
1149            // Conditional functions
1150            // ----
1151            Coalesce => self.visit_variadic(coalesce),
1152            Greatest => self.visit_variadic(|exprs: &[Expr]| max_horizontal(exprs).unwrap()),
1153            If => {
1154                let args = extract_args(function)?;
1155                match args.len() {
1156                    3 => self.try_visit_ternary(|cond: Expr, expr1: Expr, expr2: Expr| {
1157                        Ok(when(cond).then(expr1).otherwise(expr2))
1158                    }),
1159                    _ => {
1160                        polars_bail!(SQLSyntax: "IF expects 3 arguments (found {})", args.len()
1161                        )
1162                    },
1163                }
1164            },
1165            IfNull => {
1166                let args = extract_args(function)?;
1167                match args.len() {
1168                    2 => self.visit_variadic(coalesce),
1169                    _ => {
1170                        polars_bail!(SQLSyntax: "IFNULL expects 2 arguments (found {})", args.len())
1171                    },
1172                }
1173            },
1174            Least => self.visit_variadic(|exprs: &[Expr]| min_horizontal(exprs).unwrap()),
1175            NullIf => {
1176                let args = extract_args(function)?;
1177                match args.len() {
1178                    2 => self.visit_binary(|l: Expr, r: Expr| {
1179                        when(l.clone().eq(r))
1180                            .then(lit(LiteralValue::untyped_null()))
1181                            .otherwise(l)
1182                    }),
1183                    _ => {
1184                        polars_bail!(SQLSyntax: "NULLIF expects 2 arguments (found {})", args.len())
1185                    },
1186                }
1187            },
1188
1189            // ----
1190            // Date functions
1191            // ----
1192            DatePart => self.try_visit_binary(|part, e| {
1193                match part {
1194                    Expr::Literal(p) if p.extract_str().is_some() => {
1195                        let p = p.extract_str().unwrap();
1196                        // note: 'DATE_PART' and 'EXTRACT' are minor syntactic
1197                        // variations on otherwise identical functionality
1198                        parse_extract_date_part(
1199                            e,
1200                            &DateTimeField::Custom(Ident {
1201                                value: p.to_string(),
1202                                quote_style: None,
1203                                span: Span::empty(),
1204                            }),
1205                        )
1206                    },
1207                    _ => {
1208                        polars_bail!(SQLSyntax: "invalid 'part' for EXTRACT/DATE_PART ({})", part);
1209                    },
1210                }
1211            }),
1212            Strftime => {
1213                let args = extract_args(function)?;
1214                match args.len() {
1215                    2 => self.visit_binary(|e, fmt: String| e.dt().strftime(fmt.as_str())),
1216                    _ => {
1217                        polars_bail!(SQLSyntax: "STRFTIME expects 2 arguments (found {})", args.len())
1218                    },
1219                }
1220            },
1221
1222            // ----
1223            // String functions
1224            // ----
1225            BitLength => self.visit_unary(|e| e.str().len_bytes() * lit(8)),
1226            Concat => {
1227                let args = extract_args(function)?;
1228                if args.is_empty() {
1229                    polars_bail!(SQLSyntax: "CONCAT expects at least 1 argument (found 0)");
1230                } else {
1231                    self.visit_variadic(|exprs: &[Expr]| concat_str(exprs, "", true))
1232                }
1233            },
1234            ConcatWS => {
1235                let args = extract_args(function)?;
1236                if args.len() < 2 {
1237                    polars_bail!(SQLSyntax: "CONCAT_WS expects at least 2 arguments (found {})", args.len());
1238                } else {
1239                    self.try_visit_variadic(|exprs: &[Expr]| {
1240                        match &exprs[0] {
1241                            Expr::Literal(lv) if lv.extract_str().is_some() => Ok(concat_str(&exprs[1..], lv.extract_str().unwrap(), true)),
1242                            _ => polars_bail!(SQLSyntax: "CONCAT_WS 'separator' must be a literal string (found {:?})", exprs[0]),
1243                        }
1244                    })
1245                }
1246            },
1247            Date => {
1248                let args = extract_args(function)?;
1249                match args.len() {
1250                    1 => self.visit_unary(|e| e.str().to_date(StrptimeOptions::default())),
1251                    2 => self.visit_binary(|e, fmt| e.str().to_date(fmt)),
1252                    _ => {
1253                        polars_bail!(SQLSyntax: "DATE expects 1-2 arguments (found {})", args.len())
1254                    },
1255                }
1256            },
1257            EndsWith => self.visit_binary(|e, s| e.str().ends_with(s)),
1258            #[cfg(feature = "nightly")]
1259            InitCap => self.visit_unary(|e| e.str().to_titlecase()),
1260            Left => self.try_visit_binary(|e, length| {
1261                Ok(match length {
1262                    Expr::Literal(lv) if lv.is_null() => lit(lv),
1263                    Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(0))) => lit(""),
1264                    Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))) => {
1265                        let len = if n > 0 {
1266                            lit(n)
1267                        } else {
1268                            (e.clone().str().len_chars() + lit(n)).clip_min(lit(0))
1269                        };
1270                        e.str().slice(lit(0), len)
1271                    },
1272                    Expr::Literal(v) => {
1273                        polars_bail!(SQLSyntax: "invalid 'n_chars' for LEFT ({:?})", v)
1274                    },
1275                    _ => when(length.clone().gt_eq(lit(0)))
1276                        .then(e.clone().str().slice(lit(0), length.clone().abs()))
1277                        .otherwise(e.clone().str().slice(
1278                            lit(0),
1279                            (e.str().len_chars() + length.clone()).clip_min(lit(0)),
1280                        )),
1281                })
1282            }),
1283            Length => self.visit_unary(|e| e.str().len_chars()),
1284            Lower => self.visit_unary(|e| e.str().to_lowercase()),
1285            LTrim => {
1286                let args = extract_args(function)?;
1287                match args.len() {
1288                    1 => self.visit_unary(|e| {
1289                        e.str().strip_chars_start(lit(LiteralValue::untyped_null()))
1290                    }),
1291                    2 => self.visit_binary(|e, s| e.str().strip_chars_start(s)),
1292                    _ => {
1293                        polars_bail!(SQLSyntax: "LTRIM expects 1-2 arguments (found {})", args.len())
1294                    },
1295                }
1296            },
1297            Normalize => {
1298                let args = extract_args(function)?;
1299                match args.len() {
1300                    1 => self.visit_unary(|e| e.str().normalize(UnicodeForm::NFC)),
1301                    2 => {
1302                        let form = if let FunctionArgExpr::Expr(SQLExpr::Identifier(Ident {
1303                            value: s,
1304                            quote_style: None,
1305                            span: _,
1306                        })) = args[1]
1307                        {
1308                            match s.to_uppercase().as_str() {
1309                                "NFC" => UnicodeForm::NFC,
1310                                "NFD" => UnicodeForm::NFD,
1311                                "NFKC" => UnicodeForm::NFKC,
1312                                "NFKD" => UnicodeForm::NFKD,
1313                                _ => {
1314                                    polars_bail!(SQLSyntax: "invalid 'form' for NORMALIZE (found {})", s)
1315                                },
1316                            }
1317                        } else {
1318                            polars_bail!(SQLSyntax: "invalid 'form' for NORMALIZE (found {})", args[1])
1319                        };
1320                        self.try_visit_binary(|e, _form: Expr| Ok(e.str().normalize(form.clone())))
1321                    },
1322                    _ => {
1323                        polars_bail!(SQLSyntax: "NORMALIZE expects 1-2 arguments (found {})", args.len())
1324                    },
1325                }
1326            },
1327            OctetLength => self.visit_unary(|e| e.str().len_bytes()),
1328            StrPos => {
1329                // note: SQL is 1-indexed; returns zero if no match found
1330                self.visit_binary(|expr, substring| {
1331                    (expr.str().find(substring, true) + typed_lit(1u32)).fill_null(typed_lit(0u32))
1332                })
1333            },
1334            RegexpLike => {
1335                let args = extract_args(function)?;
1336                match args.len() {
1337                    2 => self.visit_binary(|e, s| e.str().contains(s, true)),
1338                    3 => self.try_visit_ternary(|e, pat, flags| {
1339                        Ok(e.str().contains(
1340                            match (pat, flags) {
1341                                (Expr::Literal(s_lv), Expr::Literal(f_lv)) if s_lv.extract_str().is_some() && f_lv.extract_str().is_some() => {
1342                                    let s = s_lv.extract_str().unwrap();
1343                                    let f = f_lv.extract_str().unwrap();
1344                                    if f.is_empty() {
1345                                        polars_bail!(SQLSyntax: "invalid/empty 'flags' for REGEXP_LIKE ({})", args[2]);
1346                                    };
1347                                    lit(format!("(?{f}){s}"))
1348                                },
1349                                _ => {
1350                                    polars_bail!(SQLSyntax: "invalid arguments for REGEXP_LIKE ({}, {})", args[1], args[2]);
1351                                },
1352                            },
1353                            true))
1354                    }),
1355                    _ => polars_bail!(SQLSyntax: "REGEXP_LIKE expects 2-3 arguments (found {})",args.len()),
1356                }
1357            },
1358            Replace => {
1359                let args = extract_args(function)?;
1360                match args.len() {
1361                    3 => self
1362                        .try_visit_ternary(|e, old, new| Ok(e.str().replace_all(old, new, true))),
1363                    _ => {
1364                        polars_bail!(SQLSyntax: "REPLACE expects 3 arguments (found {})", args.len())
1365                    },
1366                }
1367            },
1368            Reverse => self.visit_unary(|e| e.str().reverse()),
1369            Right => self.try_visit_binary(|e, length| {
1370                Ok(match length {
1371                    Expr::Literal(lv) if lv.is_null() => lit(lv),
1372                    Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(0))) => typed_lit(""),
1373                    Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))) => {
1374                        let n: i64 = n.try_into().unwrap();
1375                        let offset = if n < 0 {
1376                            lit(n.abs())
1377                        } else {
1378                            e.clone().str().len_chars().cast(DataType::Int32) - lit(n)
1379                        };
1380                        e.str().slice(offset, lit(LiteralValue::untyped_null()))
1381                    },
1382                    Expr::Literal(v) => {
1383                        polars_bail!(SQLSyntax: "invalid 'n_chars' for RIGHT ({:?})", v)
1384                    },
1385                    _ => when(length.clone().lt(lit(0)))
1386                        .then(
1387                            e.clone()
1388                                .str()
1389                                .slice(length.clone().abs(), lit(LiteralValue::untyped_null())),
1390                        )
1391                        .otherwise(e.clone().str().slice(
1392                            e.str().len_chars().cast(DataType::Int32) - length.clone(),
1393                            lit(LiteralValue::untyped_null()),
1394                        )),
1395                })
1396            }),
1397            RTrim => {
1398                let args = extract_args(function)?;
1399                match args.len() {
1400                    1 => self.visit_unary(|e| {
1401                        e.str().strip_chars_end(lit(LiteralValue::untyped_null()))
1402                    }),
1403                    2 => self.visit_binary(|e, s| e.str().strip_chars_end(s)),
1404                    _ => {
1405                        polars_bail!(SQLSyntax: "RTRIM expects 1-2 arguments (found {})", args.len())
1406                    },
1407                }
1408            },
1409            SplitPart => {
1410                let args = extract_args(function)?;
1411                match args.len() {
1412                    3 => self.try_visit_ternary(|e, sep, idx| {
1413                        let idx = adjust_one_indexed_param(idx, true);
1414                        Ok(when(e.clone().is_not_null())
1415                            .then(
1416                                e.clone()
1417                                    .str()
1418                                    .split(sep)
1419                                    .list()
1420                                    .get(idx, true)
1421                                    .fill_null(lit("")),
1422                            )
1423                            .otherwise(e))
1424                    }),
1425                    _ => {
1426                        polars_bail!(SQLSyntax: "SPLIT_PART expects 3 arguments (found {})", args.len())
1427                    },
1428                }
1429            },
1430            StartsWith => self.visit_binary(|e, s| e.str().starts_with(s)),
1431            StringToArray => {
1432                let args = extract_args(function)?;
1433                match args.len() {
1434                    2 => self.visit_binary(|e, sep| e.str().split(sep)),
1435                    _ => {
1436                        polars_bail!(SQLSyntax: "STRING_TO_ARRAY expects 2 arguments (found {})", args.len())
1437                    },
1438                }
1439            },
1440            Strptime => {
1441                let args = extract_args(function)?;
1442                match args.len() {
1443                    2 => self.visit_binary(|e, fmt: String| {
1444                        e.str().strptime(
1445                            DataType::Datetime(TimeUnit::Microseconds, None),
1446                            StrptimeOptions {
1447                                format: Some(fmt.into()),
1448                                ..Default::default()
1449                            },
1450                            lit("latest"),
1451                        )
1452                    }),
1453                    _ => {
1454                        polars_bail!(SQLSyntax: "STRPTIME expects 2 arguments (found {})", args.len())
1455                    },
1456                }
1457            },
1458            Time => {
1459                let args = extract_args(function)?;
1460                match args.len() {
1461                    1 => self.visit_unary(|e| e.str().to_time(StrptimeOptions::default())),
1462                    2 => self.visit_binary(|e, fmt| e.str().to_time(fmt)),
1463                    _ => {
1464                        polars_bail!(SQLSyntax: "TIME expects 1-2 arguments (found {})", args.len())
1465                    },
1466                }
1467            },
1468            Timestamp => {
1469                let args = extract_args(function)?;
1470                match args.len() {
1471                    1 => self.visit_unary(|e| {
1472                        e.str()
1473                            .to_datetime(None, None, StrptimeOptions::default(), lit("latest"))
1474                    }),
1475                    2 => self
1476                        .visit_binary(|e, fmt| e.str().to_datetime(None, None, fmt, lit("latest"))),
1477                    _ => {
1478                        polars_bail!(SQLSyntax: "DATETIME expects 1-2 arguments (found {})", args.len())
1479                    },
1480                }
1481            },
1482            Substring => {
1483                let args = extract_args(function)?;
1484                match args.len() {
1485                    // note: SQL is 1-indexed, hence the need for adjustments
1486                    2 => self.try_visit_binary(|e, start| {
1487                        Ok(match start {
1488                            Expr::Literal(lv) if lv.is_null() => lit(lv),
1489                            Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))) if n <= 0 => e,
1490                            Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))) => e.str().slice(lit(n - 1), lit(LiteralValue::untyped_null())),
1491                            Expr::Literal(_) => polars_bail!(SQLSyntax: "invalid 'start' for SUBSTR ({})", args[1]),
1492                            _ => start.clone() + lit(1),
1493                        })
1494                    }),
1495                    3 => self.try_visit_ternary(|e: Expr, start: Expr, length: Expr| {
1496                        Ok(match (start.clone(), length.clone()) {
1497                            (Expr::Literal(lv), _) | (_, Expr::Literal(lv)) if lv.is_null() => lit(lv),
1498                            (_, Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n)))) if n < 0 => {
1499                                polars_bail!(SQLSyntax: "SUBSTR does not support negative length ({})", args[2])
1500                            },
1501                            (Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))), _) if n > 0 => e.str().slice(lit(n - 1), length),
1502                            (Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))), _) => {
1503                                e.str().slice(lit(0), (length + lit(n - 1)).clip_min(lit(0)))
1504                            },
1505                            (Expr::Literal(_), _) => polars_bail!(SQLSyntax: "invalid 'start' for SUBSTR ({})", args[1]),
1506                            (_, Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Float(_)))) => {
1507                                polars_bail!(SQLSyntax: "invalid 'length' for SUBSTR ({})", args[1])
1508                            },
1509                            _ => {
1510                                let adjusted_start = start - lit(1);
1511                                when(adjusted_start.clone().lt(lit(0)))
1512                                    .then(e.clone().str().slice(lit(0), (length.clone() + adjusted_start.clone()).clip_min(lit(0))))
1513                                    .otherwise(e.str().slice(adjusted_start, length))
1514                            }
1515                        })
1516                    }),
1517                    _ => polars_bail!(SQLSyntax: "SUBSTR expects 2-3 arguments (found {})", args.len()),
1518                }
1519            },
1520            Upper => self.visit_unary(|e| e.str().to_uppercase()),
1521
1522            // ----
1523            // Aggregate functions
1524            // ----
1525            Avg => self.visit_unary(Expr::mean),
1526            Corr => self.visit_binary(polars_lazy::dsl::pearson_corr),
1527            Count => self.visit_count(),
1528            CovarPop => self.visit_binary(|a, b| polars_lazy::dsl::cov(a, b, 0)),
1529            CovarSamp => self.visit_binary(|a, b| polars_lazy::dsl::cov(a, b, 1)),
1530            First => self.visit_unary(Expr::first),
1531            Last => self.visit_unary(Expr::last),
1532            Max => self.visit_unary_with_opt_cumulative(Expr::max, Expr::cum_max),
1533            Median => self.visit_unary(Expr::median),
1534            QuantileCont => {
1535                let args = extract_args(function)?;
1536                match args.len() {
1537                    2 => self.try_visit_binary(|e, q| {
1538                        let value = match q {
1539                            Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Float(f))) => {
1540                                if (0.0..=1.0).contains(&f) {
1541                                    Expr::from(f)
1542                                } else {
1543                                    polars_bail!(SQLSyntax: "QUANTILE_CONT value must be between 0 and 1 ({})", args[1])
1544                                }
1545                            },
1546                            Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))) => {
1547                                if (0..=1).contains(&n) {
1548                                    Expr::from(n as f64)
1549                                } else {
1550                                    polars_bail!(SQLSyntax: "QUANTILE_CONT value must be between 0 and 1 ({})", args[1])
1551                                }
1552                            },
1553                            _ => polars_bail!(SQLSyntax: "invalid value for QUANTILE_CONT ({})", args[1])
1554                        };
1555                        Ok(e.quantile(value, QuantileMethod::Linear))
1556                    }),
1557                    _ => polars_bail!(SQLSyntax: "QUANTILE_CONT expects 2 arguments (found {})", args.len()),
1558                }
1559            },
1560            QuantileDisc => {
1561                let args = extract_args(function)?;
1562                match args.len() {
1563                    2 => self.try_visit_binary(|e, q| {
1564                        let value = match q {
1565                            Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Float(f))) => {
1566                                if (0.0..=1.0).contains(&f) {
1567                                    Expr::from(f)
1568                                } else {
1569                                    polars_bail!(SQLSyntax: "QUANTILE_DISC value must be between 0 and 1 ({})", args[1])
1570                                }
1571                            },
1572                            Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))) => {
1573                                if (0..=1).contains(&n) {
1574                                    Expr::from(n as f64)
1575                                } else {
1576                                    polars_bail!(SQLSyntax: "QUANTILE_DISC value must be between 0 and 1 ({})", args[1])
1577                                }
1578                            },
1579                            _ => polars_bail!(SQLSyntax: "invalid value for QUANTILE_DISC ({})", args[1])
1580                        };
1581                        Ok(e.quantile(value, QuantileMethod::Equiprobable))
1582                    }),
1583                    _ => polars_bail!(SQLSyntax: "QUANTILE_DISC expects 2 arguments (found {})", args.len()),
1584                }
1585            },
1586            Min => self.visit_unary_with_opt_cumulative(Expr::min, Expr::cum_min),
1587            StdDev => self.visit_unary(|e| e.std(1)),
1588            Sum => self.visit_unary_with_opt_cumulative(Expr::sum, Expr::cum_sum),
1589            Variance => self.visit_unary(|e| e.var(1)),
1590
1591            // ----
1592            // Array functions
1593            // ----
1594            ArrayAgg => self.visit_arr_agg(),
1595            ArrayContains => self.visit_binary::<Expr>(|e, s| e.list().contains(s, true)),
1596            ArrayGet => {
1597                // note: SQL is 1-indexed, not 0-indexed
1598                self.visit_binary(|e, idx: Expr| {
1599                    let idx = adjust_one_indexed_param(idx, true);
1600                    e.list().get(idx, true)
1601                })
1602            },
1603            ArrayLength => self.visit_unary(|e| e.list().len()),
1604            ArrayMax => self.visit_unary(|e| e.list().max()),
1605            ArrayMean => self.visit_unary(|e| e.list().mean()),
1606            ArrayMin => self.visit_unary(|e| e.list().min()),
1607            ArrayReverse => self.visit_unary(|e| e.list().reverse()),
1608            ArraySum => self.visit_unary(|e| e.list().sum()),
1609            ArrayToString => self.visit_arr_to_string(),
1610            ArrayUnique => self.visit_unary(|e| e.list().unique_stable()),
1611            Explode => self.visit_unary(|e| {
1612                e.explode(ExplodeOptions {
1613                    empty_as_null: true,
1614                    keep_nulls: true,
1615                })
1616            }),
1617
1618            // ----
1619            // Column selection
1620            // ----
1621            Columns => {
1622                let active_schema = self.active_schema;
1623                self.try_visit_unary(|e: Expr| match e {
1624                    Expr::Literal(lv) if lv.extract_str().is_some() => {
1625                        let pat = lv.extract_str().unwrap();
1626                        if pat == "*" {
1627                            polars_bail!(
1628                                SQLSyntax: "COLUMNS('*') is not a valid regex; \
1629                                did you mean COLUMNS(*)?"
1630                            )
1631                        };
1632                        let pat = match pat {
1633                            _ if pat.starts_with('^') && pat.ends_with('$') => pat.to_string(),
1634                            _ if pat.starts_with('^') => format!("{pat}.*$"),
1635                            _ if pat.ends_with('$') => format!("^.*{pat}"),
1636                            _ => format!("^.*{pat}.*$"),
1637                        };
1638                        if let Some(active_schema) = &active_schema {
1639                            let rx = polars_utils::regex_cache::compile_regex(&pat).unwrap();
1640                            let col_names = active_schema
1641                                .iter_names()
1642                                .filter(|name| rx.is_match(name))
1643                                .cloned()
1644                                .collect::<Vec<_>>();
1645
1646                            Ok(if col_names.len() == 1 {
1647                                col(col_names.into_iter().next().unwrap())
1648                            } else {
1649                                cols(col_names).as_expr()
1650                            })
1651                        } else {
1652                            Ok(col(pat.as_str()))
1653                        }
1654                    },
1655                    Expr::Selector(s) => Ok(s.as_expr()),
1656                    _ => polars_bail!(SQLSyntax: "COLUMNS expects a regex; found {:?}", e),
1657                })
1658            },
1659
1660            // ----
1661            // Window functions
1662            // ----
1663            FirstValue => self.visit_unary(Expr::first),
1664            LastValue => {
1665                // With the default window frame (ROWS UNBOUNDED PRECEDING TO CURRENT ROW),
1666                // LAST_VALUE returns the last value from the start of the partition up
1667                // to the current row - which is simply the current row's value.
1668                let args = extract_args(function)?;
1669                match args.as_slice() {
1670                    [FunctionArgExpr::Expr(sql_expr)] => {
1671                        parse_sql_expr(sql_expr, self.ctx, self.active_schema)
1672                    },
1673                    _ => polars_bail!(
1674                        SQLSyntax: "LAST_VALUE expects exactly 1 argument (found {})",
1675                        args.len()
1676                    ),
1677                }
1678            },
1679            Lag => self.visit_window_offset_function(1),
1680            Lead => self.visit_window_offset_function(-1),
1681            #[cfg(feature = "rank")]
1682            Rank | DenseRank => {
1683                let (func_name, rank_method) = match function_name {
1684                    Rank => ("RANK", RankMethod::Min),
1685                    DenseRank => ("DENSE_RANK", RankMethod::Dense),
1686                    _ => unreachable!(),
1687                };
1688                let args = extract_args(function)?;
1689                if !args.is_empty() {
1690                    polars_bail!(SQLSyntax: "{} expects 0 arguments (found {})", func_name, args.len());
1691                }
1692                let window_spec = match &self.func.over {
1693                    Some(WindowType::WindowSpec(spec)) if !spec.order_by.is_empty() => spec,
1694                    _ => {
1695                        polars_bail!(SQLSyntax: "{} requires an OVER clause with ORDER BY", func_name)
1696                    },
1697                };
1698                let (order_exprs, all_desc) =
1699                    self.parse_order_by_in_window(&window_spec.order_by)?;
1700                let rank_expr = if order_exprs.len() == 1 {
1701                    order_exprs[0].clone().rank(
1702                        RankOptions {
1703                            method: rank_method,
1704                            descending: all_desc,
1705                        },
1706                        None,
1707                    )
1708                } else {
1709                    as_struct(order_exprs).rank(
1710                        RankOptions {
1711                            method: rank_method,
1712                            descending: all_desc,
1713                        },
1714                        None,
1715                    )
1716                };
1717                self.apply_window_spec(rank_expr, &self.func.over)
1718            },
1719            RowNumber => {
1720                let args = extract_args(function)?;
1721                if !args.is_empty() {
1722                    polars_bail!(SQLSyntax: "ROW_NUMBER expects 0 arguments (found {})", args.len());
1723                }
1724                // note: SQL is 1-indexed
1725                let row_num_expr = int_range(lit(0i64), len(), 1, DataType::UInt32) + lit(1u32);
1726                self.apply_window_spec(row_num_expr, &self.func.over)
1727            },
1728
1729            // ----
1730            // User-defined
1731            // ----
1732            Udf(func_name) => self.visit_udf(&func_name),
1733        }
1734    }
1735
1736    fn visit_window_offset_function(&mut self, offset_multiplier: i64) -> PolarsResult<Expr> {
1737        // LAG/LEAD require an OVER clause
1738        if self.func.over.is_none() {
1739            polars_bail!(SQLSyntax: "{} requires an OVER clause", self.func.name);
1740        }
1741
1742        // LAG/LEAD require ORDER BY in the OVER clause
1743        let window_type = self.func.over.as_ref().unwrap();
1744        let window_spec = self.resolve_window_spec(window_type)?;
1745        if window_spec.order_by.is_empty() {
1746            polars_bail!(SQLSyntax: "{} requires an ORDER BY in the OVER clause", self.func.name);
1747        }
1748
1749        let args = extract_args(self.func)?;
1750
1751        match args.as_slice() {
1752            [FunctionArgExpr::Expr(sql_expr)] => {
1753                let expr = parse_sql_expr(sql_expr, self.ctx, self.active_schema)?;
1754                Ok(expr.shift(offset_multiplier.into()))
1755            },
1756            [FunctionArgExpr::Expr(sql_expr), FunctionArgExpr::Expr(offset_expr)] => {
1757                let expr = parse_sql_expr(sql_expr, self.ctx, self.active_schema)?;
1758                let offset = parse_sql_expr(offset_expr, self.ctx, self.active_schema)?;
1759                if let Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))) = offset {
1760                    if n <= 0 {
1761                        polars_bail!(SQLSyntax: "offset must be positive (found {})", n)
1762                    }
1763                    Ok(expr.shift((offset_multiplier * n as i64).into()))
1764                } else {
1765                    polars_bail!(SQLSyntax: "offset must be an integer (found {:?})", offset)
1766                }
1767            },
1768            _ => polars_bail!(SQLSyntax: "{} expects 1 or 2 arguments (found {})", self.func.name, args.len()),
1769        }.and_then(|e| self.apply_window_spec(e, &self.func.over))
1770    }
1771
1772    fn visit_udf(&mut self, func_name: &str) -> PolarsResult<Expr> {
1773        let args = extract_args(self.func)?
1774            .into_iter()
1775            .map(|arg| {
1776                if let FunctionArgExpr::Expr(e) = arg {
1777                    parse_sql_expr(e, self.ctx, self.active_schema)
1778                } else {
1779                    polars_bail!(SQLInterface: "only expressions are supported in UDFs")
1780                }
1781            })
1782            .collect::<PolarsResult<Vec<_>>>()?;
1783
1784        Ok(self
1785            .ctx
1786            .function_registry
1787            .get_udf(func_name)?
1788            .ok_or_else(|| polars_err!(SQLInterface: "UDF {} not found", func_name))?
1789            .call(args))
1790    }
1791
1792    /// Validate window frame specifications.
1793    ///
1794    /// Polars only supports ROWS frame semantics, and does
1795    /// not currently support customising the window.
1796    ///
1797    /// **Supported Frame Spec**
1798    /// - `ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`
1799    ///
1800    /// **Unsupported Frame Spec**
1801    /// - `RANGE ...` (peer group semantics not implemented)
1802    /// - `GROUPS ...` (peer group semantics not implemented)
1803    /// - `ROWS` with other bounds (e.g., `<n> PRECEDING`, `FOLLOWING`, etc)
1804    fn validate_window_frame(&self, window_frame: &Option<WindowFrame>) -> PolarsResult<()> {
1805        if let Some(frame) = window_frame {
1806            match frame.units {
1807                WindowFrameUnits::Range => {
1808                    polars_bail!(
1809                        SQLInterface:
1810                        "RANGE-based window frames are not supported"
1811                    );
1812                },
1813                WindowFrameUnits::Groups => {
1814                    polars_bail!(
1815                        SQLInterface:
1816                        "GROUPS-based window frames are not supported"
1817                    );
1818                },
1819                WindowFrameUnits::Rows => {
1820                    if !matches!(
1821                        (&frame.start_bound, &frame.end_bound),
1822                        (
1823                            WindowFrameBound::Preceding(None),         // UNBOUNDED PRECEDING
1824                            None | Some(WindowFrameBound::CurrentRow)  // CURRENT ROW
1825                        )
1826                    ) {
1827                        polars_bail!(
1828                            SQLInterface:
1829                            "only 'ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW' is currently supported; found 'ROWS BETWEEN {} AND {}'",
1830                            frame.start_bound,
1831                            frame.end_bound.as_ref().map_or("CURRENT ROW", |b| {
1832                                match b {
1833                                    WindowFrameBound::CurrentRow => "CURRENT ROW",
1834                                    WindowFrameBound::Preceding(_) => "N PRECEDING",
1835                                    WindowFrameBound::Following(_) => "N FOLLOWING",
1836                                }
1837                            })
1838                        );
1839                    }
1840                },
1841            }
1842        }
1843        Ok(())
1844    }
1845
1846    /// Window specs that map to cumulative functions.
1847    ///
1848    /// Converts SQL window functions with ORDER BY to compatible cumulative ops:
1849    /// - `SUM(a) OVER (ORDER BY b)` → `a.cum_sum().over(order_by=b)`
1850    /// - `MAX(a) OVER (ORDER BY b)` → `a.cum_max().over(order_by=b)`
1851    /// - `MIN(a) OVER (ORDER BY b)` → `a.cum_min().over(order_by=b)`
1852    ///
1853    /// ROWS vs RANGE Semantics (show default behaviour if no frame spec):
1854    ///
1855    /// **Polars (ROWS)**
1856    /// Each row gets its own cumulative value row-by-row.
1857    /// ```text
1858    /// Data: [(A,X,10), (A,X,15), (A,Y,20)]
1859    /// Query: SUM(value) OVER (ORDER BY category, subcategory)
1860    /// Result: [10, 25, 45]  ← row-by-row cumulative
1861    /// ```
1862    ///
1863    /// **SQL (RANGE)**
1864    /// Rows with identical ORDER BY values (peers) get the same result.
1865    /// ```text
1866    /// Same data, query with RANGE (eg: using a relational DB):
1867    /// Result: [25, 25, 45]  ← both (A,X) rows get 25
1868    /// ```
1869    fn apply_cumulative_window(
1870        &mut self,
1871        f: impl Fn(Expr) -> Expr,
1872        cumulative_fn: impl Fn(Expr, bool) -> Expr,
1873        WindowSpec {
1874            partition_by,
1875            order_by,
1876            window_frame,
1877            ..
1878        }: &WindowSpec,
1879    ) -> PolarsResult<Expr> {
1880        self.validate_window_frame(window_frame)?;
1881
1882        if !order_by.is_empty() {
1883            // Extract ORDER BY exprs and sort direction
1884            let (order_by_exprs, all_desc) = self.parse_order_by_in_window(order_by)?;
1885
1886            // Get the base expr/column
1887            let args = extract_args(self.func)?;
1888            let base_expr = match args.as_slice() {
1889                [FunctionArgExpr::Expr(sql_expr)] => {
1890                    parse_sql_expr(sql_expr, self.ctx, self.active_schema)?
1891                },
1892                _ => return self.not_supported_error(),
1893            };
1894            let partition_by_exprs = if partition_by.is_empty() {
1895                None
1896            } else {
1897                Some(
1898                    partition_by
1899                        .iter()
1900                        .map(|p| parse_sql_expr(p, self.ctx, self.active_schema))
1901                        .collect::<PolarsResult<Vec<_>>>()?,
1902                )
1903            };
1904
1905            // Apply cumulative function and wrap with window spec
1906            let cumulative_expr = cumulative_fn(base_expr, false);
1907            let sort_opts = SortOptions::default().with_order_descending(all_desc);
1908            cumulative_expr.over_with_options(
1909                partition_by_exprs,
1910                Some((order_by_exprs, sort_opts)),
1911                Default::default(),
1912            )
1913        } else {
1914            self.visit_unary(f)
1915        }
1916    }
1917
1918    fn visit_unary(&mut self, f: impl Fn(Expr) -> Expr) -> PolarsResult<Expr> {
1919        self.try_visit_unary(|e| Ok(f(e)))
1920    }
1921
1922    fn try_visit_unary(&mut self, f: impl Fn(Expr) -> PolarsResult<Expr>) -> PolarsResult<Expr> {
1923        let args = extract_args(self.func)?;
1924        match args.as_slice() {
1925            [FunctionArgExpr::Expr(sql_expr)] => {
1926                f(parse_sql_expr(sql_expr, self.ctx, self.active_schema)?)
1927            },
1928            [FunctionArgExpr::Wildcard] => f(parse_sql_expr(
1929                &SQLExpr::Wildcard(AttachedToken::empty()),
1930                self.ctx,
1931                self.active_schema,
1932            )?),
1933            _ => self.not_supported_error(),
1934        }
1935        .and_then(|e| self.apply_window_spec(e, &self.func.over))
1936    }
1937
1938    /// Resolve a WindowType to a concrete WindowSpec (handles named window references)
1939    fn resolve_window_spec(&self, window_type: &WindowType) -> PolarsResult<WindowSpec> {
1940        match window_type {
1941            WindowType::WindowSpec(spec) => Ok(spec.clone()),
1942            WindowType::NamedWindow(name) => self
1943                .ctx
1944                .named_windows
1945                .get(&name.value)
1946                .cloned()
1947                .ok_or_else(|| {
1948                    polars_err!(
1949                        SQLInterface:
1950                        "named window '{}' was not found",
1951                        name.value
1952                    )
1953                }),
1954        }
1955    }
1956
1957    /// Some functions have cumulative equivalents that can be applied to window specs
1958    /// e.g. SUM(a) OVER (ORDER BY b DESC) -> CUMSUM(a, false)
1959    fn visit_unary_with_opt_cumulative(
1960        &mut self,
1961        f: impl Fn(Expr) -> Expr,
1962        cumulative_fn: impl Fn(Expr, bool) -> Expr,
1963    ) -> PolarsResult<Expr> {
1964        match self.func.over.as_ref() {
1965            Some(window_type) => {
1966                let spec = self.resolve_window_spec(window_type)?;
1967                self.apply_cumulative_window(f, cumulative_fn, &spec)
1968            },
1969            None => self.visit_unary(f),
1970        }
1971    }
1972
1973    fn visit_binary<Arg: FromSQLExpr>(
1974        &mut self,
1975        f: impl Fn(Expr, Arg) -> Expr,
1976    ) -> PolarsResult<Expr> {
1977        self.try_visit_binary(|e, a| Ok(f(e, a)))
1978    }
1979
1980    fn try_visit_binary<Arg: FromSQLExpr>(
1981        &mut self,
1982        f: impl Fn(Expr, Arg) -> PolarsResult<Expr>,
1983    ) -> PolarsResult<Expr> {
1984        let args = extract_args(self.func)?;
1985        match args.as_slice() {
1986            [
1987                FunctionArgExpr::Expr(sql_expr1),
1988                FunctionArgExpr::Expr(sql_expr2),
1989            ] => {
1990                let expr1 = parse_sql_expr(sql_expr1, self.ctx, self.active_schema)?;
1991                let expr2 = Arg::from_sql_expr(sql_expr2, self.ctx)?;
1992                f(expr1, expr2)
1993            },
1994            _ => self.not_supported_error(),
1995        }
1996    }
1997
1998    fn visit_variadic(&mut self, f: impl Fn(&[Expr]) -> Expr) -> PolarsResult<Expr> {
1999        self.try_visit_variadic(|e| Ok(f(e)))
2000    }
2001
2002    fn try_visit_variadic(
2003        &mut self,
2004        f: impl Fn(&[Expr]) -> PolarsResult<Expr>,
2005    ) -> PolarsResult<Expr> {
2006        let args = extract_args(self.func)?;
2007        let mut expr_args = vec![];
2008        for arg in args {
2009            if let FunctionArgExpr::Expr(sql_expr) = arg {
2010                expr_args.push(parse_sql_expr(sql_expr, self.ctx, self.active_schema)?);
2011            } else {
2012                return self.not_supported_error();
2013            };
2014        }
2015        f(&expr_args)
2016    }
2017
2018    fn try_visit_ternary<Arg: FromSQLExpr>(
2019        &mut self,
2020        f: impl Fn(Expr, Arg, Arg) -> PolarsResult<Expr>,
2021    ) -> PolarsResult<Expr> {
2022        let args = extract_args(self.func)?;
2023        match args.as_slice() {
2024            [
2025                FunctionArgExpr::Expr(sql_expr1),
2026                FunctionArgExpr::Expr(sql_expr2),
2027                FunctionArgExpr::Expr(sql_expr3),
2028            ] => {
2029                let expr1 = parse_sql_expr(sql_expr1, self.ctx, self.active_schema)?;
2030                let expr2 = Arg::from_sql_expr(sql_expr2, self.ctx)?;
2031                let expr3 = Arg::from_sql_expr(sql_expr3, self.ctx)?;
2032                f(expr1, expr2, expr3)
2033            },
2034            _ => self.not_supported_error(),
2035        }
2036    }
2037
2038    fn visit_nullary(&self, f: impl Fn() -> Expr) -> PolarsResult<Expr> {
2039        let args = extract_args(self.func)?;
2040        if !args.is_empty() {
2041            return self.not_supported_error();
2042        }
2043        Ok(f())
2044    }
2045
2046    fn visit_arr_agg(&mut self) -> PolarsResult<Expr> {
2047        let (args, is_distinct, clauses) = extract_args_and_clauses(self.func)?;
2048        match args.as_slice() {
2049            [FunctionArgExpr::Expr(sql_expr)] => {
2050                let mut base = parse_sql_expr(sql_expr, self.ctx, self.active_schema)?;
2051                let mut order_by_clause = None;
2052                let mut limit_clause = None;
2053                for clause in &clauses {
2054                    match clause {
2055                        FunctionArgumentClause::OrderBy(order_exprs) => {
2056                            order_by_clause = Some(order_exprs.as_slice());
2057                        },
2058                        FunctionArgumentClause::Limit(limit_expr) => {
2059                            limit_clause = Some(limit_expr);
2060                        },
2061                        _ => {},
2062                    }
2063                }
2064                if !is_distinct {
2065                    // No DISTINCT: apply ORDER BY normally
2066                    if let Some(order_by) = order_by_clause {
2067                        base = self.apply_order_by(base, order_by)?;
2068                    }
2069                } else {
2070                    // DISTINCT: apply unique, then sort the result
2071                    base = base.unique_stable();
2072                    if let Some(order_by) = order_by_clause {
2073                        base = self.apply_order_by_to_distinct_array(base, order_by, sql_expr)?;
2074                    }
2075                }
2076                if let Some(limit_expr) = limit_clause {
2077                    let limit = parse_sql_expr(limit_expr, self.ctx, self.active_schema)?;
2078                    match limit {
2079                        Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(n))) if n >= 0 => {
2080                            base = base.head(Some(n as usize))
2081                        },
2082                        _ => {
2083                            polars_bail!(SQLSyntax: "LIMIT in ARRAY_AGG must be a positive integer")
2084                        },
2085                    };
2086                }
2087                Ok(base.implode())
2088            },
2089            _ => {
2090                polars_bail!(SQLSyntax: "ARRAY_AGG must have exactly one argument; found {}", args.len())
2091            },
2092        }
2093    }
2094
2095    fn visit_arr_to_string(&mut self) -> PolarsResult<Expr> {
2096        let args = extract_args(self.func)?;
2097        match args.len() {
2098            2 => self.try_visit_binary(|e, sep| {
2099                Ok(e.cast(DataType::List(Box::from(DataType::String)))
2100                    .list()
2101                    .join(sep, true))
2102            }),
2103            #[cfg(feature = "list_eval")]
2104            3 => self.try_visit_ternary(|e, sep, null_value| match null_value {
2105                Expr::Literal(lv) if lv.extract_str().is_some() => {
2106                    Ok(if lv.extract_str().unwrap().is_empty() {
2107                        e.cast(DataType::List(Box::from(DataType::String)))
2108                            .list()
2109                            .join(sep, true)
2110                    } else {
2111                        e.cast(DataType::List(Box::from(DataType::String)))
2112                            .list()
2113                            .eval(element().fill_null(lit(lv.extract_str().unwrap())))
2114                            .list()
2115                            .join(sep, false)
2116                    })
2117                },
2118                _ => {
2119                    polars_bail!(SQLSyntax: "invalid null value for ARRAY_TO_STRING ({})", args[2])
2120                },
2121            }),
2122            _ => {
2123                polars_bail!(SQLSyntax: "ARRAY_TO_STRING expects 2-3 arguments (found {})", args.len())
2124            },
2125        }
2126    }
2127
2128    fn visit_count(&mut self) -> PolarsResult<Expr> {
2129        let (args, is_distinct) = extract_args_distinct(self.func)?;
2130
2131        // Window function with an ORDER BY clause?
2132        let has_order_by = match &self.func.over {
2133            Some(WindowType::WindowSpec(spec)) => !spec.order_by.is_empty(),
2134            _ => false,
2135        };
2136        if has_order_by && !is_distinct {
2137            if let Some(WindowType::WindowSpec(spec)) = &self.func.over {
2138                self.validate_window_frame(&spec.window_frame)?;
2139
2140                match args.as_slice() {
2141                    [FunctionArgExpr::Wildcard] | [] => {
2142                        // COUNT(*) with ORDER BY -> map to `int_range`
2143                        let (order_by_exprs, all_desc) =
2144                            self.parse_order_by_in_window(&spec.order_by)?;
2145                        let partition_by_exprs = if spec.partition_by.is_empty() {
2146                            None
2147                        } else {
2148                            Some(
2149                                spec.partition_by
2150                                    .iter()
2151                                    .map(|p| parse_sql_expr(p, self.ctx, self.active_schema))
2152                                    .collect::<PolarsResult<Vec<_>>>()?,
2153                            )
2154                        };
2155                        let sort_opts = SortOptions::default().with_order_descending(all_desc);
2156                        let row_number = int_range(lit(0), len(), 1, DataType::Int64).add(lit(1)); // SQL is 1-indexed
2157
2158                        return row_number.over_with_options(
2159                            partition_by_exprs,
2160                            Some((order_by_exprs, sort_opts)),
2161                            Default::default(),
2162                        );
2163                    },
2164                    [FunctionArgExpr::Expr(_)] => {
2165                        // COUNT(column) with ORDER BY -> use cum_count
2166                        return self.visit_unary_with_opt_cumulative(
2167                            |e| e.count(),
2168                            |e, reverse| e.cum_count(reverse),
2169                        );
2170                    },
2171                    _ => {},
2172                }
2173            }
2174        }
2175        let count_expr = match (is_distinct, args.as_slice()) {
2176            // COUNT(*), COUNT()
2177            (false, [FunctionArgExpr::Wildcard] | []) => len(),
2178            // COUNT(col)
2179            (false, [FunctionArgExpr::Expr(sql_expr)]) => {
2180                let expr = parse_sql_expr(sql_expr, self.ctx, self.active_schema)?;
2181                expr.count()
2182            },
2183            // COUNT(DISTINCT col)
2184            (true, [FunctionArgExpr::Expr(sql_expr)]) => {
2185                let expr = parse_sql_expr(sql_expr, self.ctx, self.active_schema)?;
2186                expr.clone().n_unique().sub(expr.null_count().gt(lit(0)))
2187            },
2188            _ => self.not_supported_error()?,
2189        };
2190        self.apply_window_spec(count_expr, &self.func.over)
2191    }
2192
2193    fn apply_order_by(&mut self, expr: Expr, order_by: &[OrderByExpr]) -> PolarsResult<Expr> {
2194        let mut by = Vec::with_capacity(order_by.len());
2195        let mut descending = Vec::with_capacity(order_by.len());
2196        let mut nulls_last = Vec::with_capacity(order_by.len());
2197
2198        for ob in order_by {
2199            // Note: if not specified 'NULLS FIRST' is default for DESC, 'NULLS LAST' otherwise
2200            // https://www.postgresql.org/docs/current/queries-order.html
2201            let desc_order = !ob.options.asc.unwrap_or(true);
2202            by.push(parse_sql_expr(&ob.expr, self.ctx, self.active_schema)?);
2203            nulls_last.push(!ob.options.nulls_first.unwrap_or(desc_order));
2204            descending.push(desc_order);
2205        }
2206        Ok(expr.sort_by(
2207            by,
2208            SortMultipleOptions::default()
2209                .with_order_descending_multi(descending)
2210                .with_nulls_last_multi(nulls_last)
2211                .with_maintain_order(true),
2212        ))
2213    }
2214
2215    fn apply_order_by_to_distinct_array(
2216        &mut self,
2217        expr: Expr,
2218        order_by: &[OrderByExpr],
2219        base_sql_expr: &SQLExpr,
2220    ) -> PolarsResult<Expr> {
2221        // If ORDER BY references the base expression, use .sort() directly
2222        if order_by.len() == 1 && order_by[0].expr == *base_sql_expr {
2223            let desc_order = !order_by[0].options.asc.unwrap_or(true);
2224            let nulls_last = !order_by[0].options.nulls_first.unwrap_or(desc_order);
2225            return Ok(expr.sort(
2226                SortOptions::default()
2227                    .with_order_descending(desc_order)
2228                    .with_nulls_last(nulls_last)
2229                    .with_maintain_order(true),
2230            ));
2231        }
2232        // Otherwise, fall back to `sort_by` (may need to handle further edge-cases later)
2233        self.apply_order_by(expr, order_by)
2234    }
2235
2236    /// Parse ORDER BY (in OVER clause), validating uniform direction.
2237    fn parse_order_by_in_window(
2238        &mut self,
2239        order_by: &[OrderByExpr],
2240    ) -> PolarsResult<(Vec<Expr>, bool)> {
2241        if order_by.is_empty() {
2242            return Ok((Vec::new(), false));
2243        }
2244        // Parse expressions and validate uniform direction
2245        let all_ascending = order_by[0].options.asc.unwrap_or(true);
2246        let mut exprs = Vec::with_capacity(order_by.len());
2247        for o in order_by {
2248            if all_ascending != o.options.asc.unwrap_or(true) {
2249                // TODO: mixed sort directions are not currently supported; we
2250                //  need to enhance `over_with_options` to take SortMultipleOptions
2251                polars_bail!(
2252                    SQLSyntax:
2253                    "OVER does not (yet) support mixed asc/desc directions for ORDER BY"
2254                )
2255            }
2256            let expr = parse_sql_expr(&o.expr, self.ctx, self.active_schema)?;
2257            exprs.push(expr);
2258        }
2259        Ok((exprs, !all_ascending))
2260    }
2261
2262    fn apply_window_spec(
2263        &mut self,
2264        expr: Expr,
2265        window_type: &Option<WindowType>,
2266    ) -> PolarsResult<Expr> {
2267        let Some(window_type) = window_type else {
2268            return Ok(expr);
2269        };
2270        let window_spec = self.resolve_window_spec(window_type)?;
2271        self.validate_window_frame(&window_spec.window_frame)?;
2272
2273        let partition_by = if window_spec.partition_by.is_empty() {
2274            None
2275        } else {
2276            Some(
2277                window_spec
2278                    .partition_by
2279                    .iter()
2280                    .map(|p| parse_sql_expr(p, self.ctx, self.active_schema))
2281                    .collect::<PolarsResult<Vec<_>>>()?,
2282            )
2283        };
2284        let order_by = if window_spec.order_by.is_empty() {
2285            None
2286        } else {
2287            let (order_exprs, all_desc) = self.parse_order_by_in_window(&window_spec.order_by)?;
2288            let sort_opts = SortOptions::default().with_order_descending(all_desc);
2289            Some((order_exprs, sort_opts))
2290        };
2291
2292        // Apply window spec
2293        Ok(match (partition_by, order_by) {
2294            (None, None) => expr,
2295            (Some(part), None) => expr.over(part),
2296            (part, Some(order)) => expr.over_with_options(part, Some(order), Default::default())?,
2297        })
2298    }
2299
2300    fn not_supported_error(&self) -> PolarsResult<Expr> {
2301        polars_bail!(
2302            SQLInterface:
2303            "no function matches the given name and arguments: `{}`",
2304            self.func.to_string()
2305        );
2306    }
2307}
2308
2309fn extract_args(func: &SQLFunction) -> PolarsResult<Vec<&FunctionArgExpr>> {
2310    let (args, _, _) = _extract_func_args(func, false, false)?;
2311    Ok(args)
2312}
2313
2314fn extract_args_distinct(func: &SQLFunction) -> PolarsResult<(Vec<&FunctionArgExpr>, bool)> {
2315    let (args, is_distinct, _) = _extract_func_args(func, true, false)?;
2316    Ok((args, is_distinct))
2317}
2318
2319fn extract_args_and_clauses(
2320    func: &SQLFunction,
2321) -> PolarsResult<(Vec<&FunctionArgExpr>, bool, Vec<FunctionArgumentClause>)> {
2322    _extract_func_args(func, true, true)
2323}
2324
2325fn _extract_func_args(
2326    func: &SQLFunction,
2327    get_distinct: bool,
2328    get_clauses: bool,
2329) -> PolarsResult<(Vec<&FunctionArgExpr>, bool, Vec<FunctionArgumentClause>)> {
2330    match &func.args {
2331        FunctionArguments::List(FunctionArgumentList {
2332            args,
2333            duplicate_treatment,
2334            clauses,
2335        }) => {
2336            let is_distinct = matches!(duplicate_treatment, Some(DuplicateTreatment::Distinct));
2337            if !(get_clauses || get_distinct) && is_distinct {
2338                polars_bail!(SQLSyntax: "unexpected use of DISTINCT found in '{}'", func.name)
2339            } else if !get_clauses && !clauses.is_empty() {
2340                polars_bail!(SQLSyntax: "unexpected clause found in '{}' ({})", func.name, clauses[0])
2341            } else {
2342                let unpacked_args = args
2343                    .iter()
2344                    .map(|arg| match arg {
2345                        FunctionArg::Named { arg, .. } => arg,
2346                        FunctionArg::ExprNamed { arg, .. } => arg,
2347                        FunctionArg::Unnamed(arg) => arg,
2348                    })
2349                    .collect();
2350                Ok((unpacked_args, is_distinct, clauses.clone()))
2351            }
2352        },
2353        FunctionArguments::Subquery { .. } => {
2354            Err(polars_err!(SQLInterface: "subquery not expected in {}", func.name))
2355        },
2356        FunctionArguments::None => Ok((vec![], false, vec![])),
2357    }
2358}
2359
2360pub(crate) trait FromSQLExpr {
2361    fn from_sql_expr(expr: &SQLExpr, ctx: &mut SQLContext) -> PolarsResult<Self>
2362    where
2363        Self: Sized;
2364}
2365
2366impl FromSQLExpr for f64 {
2367    fn from_sql_expr(expr: &SQLExpr, _ctx: &mut SQLContext) -> PolarsResult<Self>
2368    where
2369        Self: Sized,
2370    {
2371        match expr {
2372            SQLExpr::Value(ValueWithSpan { value: v, .. }) => match v {
2373                SQLValue::Number(s, _) => s
2374                    .parse()
2375                    .map_err(|_| polars_err!(SQLInterface: "cannot parse literal {:?}", s)),
2376                _ => polars_bail!(SQLInterface: "cannot parse literal {:?}", v),
2377            },
2378            _ => polars_bail!(SQLInterface: "cannot parse literal {:?}", expr),
2379        }
2380    }
2381}
2382
2383impl FromSQLExpr for bool {
2384    fn from_sql_expr(expr: &SQLExpr, _ctx: &mut SQLContext) -> PolarsResult<Self>
2385    where
2386        Self: Sized,
2387    {
2388        match expr {
2389            SQLExpr::Value(ValueWithSpan { value: v, .. }) => match v {
2390                SQLValue::Boolean(v) => Ok(*v),
2391                _ => polars_bail!(SQLInterface: "cannot parse boolean {:?}", v),
2392            },
2393            _ => polars_bail!(SQLInterface: "cannot parse boolean {:?}", expr),
2394        }
2395    }
2396}
2397
2398impl FromSQLExpr for String {
2399    fn from_sql_expr(expr: &SQLExpr, _: &mut SQLContext) -> PolarsResult<Self>
2400    where
2401        Self: Sized,
2402    {
2403        match expr {
2404            SQLExpr::Value(ValueWithSpan { value: v, .. }) => match v {
2405                SQLValue::SingleQuotedString(s) => Ok(s.clone()),
2406                _ => polars_bail!(SQLInterface: "cannot parse literal {:?}", v),
2407            },
2408            _ => polars_bail!(SQLInterface: "cannot parse literal {:?}", expr),
2409        }
2410    }
2411}
2412
2413impl FromSQLExpr for StrptimeOptions {
2414    fn from_sql_expr(expr: &SQLExpr, _: &mut SQLContext) -> PolarsResult<Self>
2415    where
2416        Self: Sized,
2417    {
2418        match expr {
2419            SQLExpr::Value(ValueWithSpan { value: v, .. }) => match v {
2420                SQLValue::SingleQuotedString(s) => Ok(StrptimeOptions {
2421                    format: Some(PlSmallStr::from_str(s)),
2422                    ..StrptimeOptions::default()
2423                }),
2424                _ => polars_bail!(SQLInterface: "cannot parse literal {:?}", v),
2425            },
2426            _ => polars_bail!(SQLInterface: "cannot parse literal {:?}", expr),
2427        }
2428    }
2429}
2430
2431impl FromSQLExpr for Expr {
2432    fn from_sql_expr(expr: &SQLExpr, ctx: &mut SQLContext) -> PolarsResult<Self>
2433    where
2434        Self: Sized,
2435    {
2436        parse_sql_expr(expr, ctx, None)
2437    }
2438}