Skip to main content

polars_core/chunked_array/ops/sort/
mod.rs

1mod arg_sort;
2
3pub mod arg_sort_multiple;
4
5pub mod arg_bottom_k;
6pub mod options;
7
8#[cfg(feature = "dtype-categorical")]
9mod categorical;
10
11use std::cmp::Ordering;
12
13pub(crate) use arg_sort::arg_sort_row_fmt;
14pub(crate) use arg_sort_multiple::argsort_multiple_row_fmt;
15use arrow::bitmap::{Bitmap, BitmapBuilder};
16use arrow::legacy::trusted_len::TrustedLenPush;
17use compare_inner::NonNull;
18use polars_buffer::Buffer;
19use polars_utils::nulls::IsNull;
20use polars_utils::sort::reorder_cmp;
21use rayon::prelude::*;
22pub use slice::*;
23
24use super::row_encode::_get_rows_encoded_ca;
25use crate::prelude::compare_inner::TotalOrdInner;
26use crate::prelude::sort::arg_sort_multiple::*;
27use crate::prelude::*;
28use crate::runtime::RAYON;
29use crate::series::IsSorted;
30use crate::utils::NoNull;
31
32fn partition_nulls<T: Copy>(
33    values: &mut [T],
34    mut validity: Option<Bitmap>,
35    options: SortOptions,
36) -> (&mut [T], Option<Bitmap>) {
37    let partitioned = if let Some(bitmap) = &validity {
38        // Partition null last first
39        let mut out_len = 0;
40        for idx in bitmap.true_idx_iter() {
41            unsafe { *values.get_unchecked_mut(out_len) = *values.get_unchecked(idx) };
42            out_len += 1;
43        }
44        let valid_count = out_len;
45        let null_count = values.len() - valid_count;
46        validity = Some(create_validity(
47            bitmap.len(),
48            bitmap.unset_bits(),
49            options.nulls_last,
50        ));
51
52        // Views are correctly partitioned.
53        if options.nulls_last {
54            &mut values[..valid_count]
55        }
56        // We need to swap the ends.
57        else {
58            // swap nulls with end
59            let mut end = values.len() - 1;
60
61            for i in 0..null_count {
62                unsafe { *values.get_unchecked_mut(end) = *values.get_unchecked(i) };
63                end = end.saturating_sub(1);
64            }
65            &mut values[null_count..]
66        }
67    } else {
68        values
69    };
70    (partitioned, validity)
71}
72
73pub(crate) fn sort_by_branch<T, C>(slice: &mut [T], descending: bool, cmp: C, parallel: bool)
74where
75    T: Send,
76    C: Send + Sync + Fn(&T, &T) -> Ordering,
77{
78    if parallel {
79        RAYON.install(|| match descending {
80            true => slice.par_sort_by(|a, b| cmp(b, a)),
81            false => slice.par_sort_by(cmp),
82        })
83    } else {
84        match descending {
85            true => slice.sort_by(|a, b| cmp(b, a)),
86            false => slice.sort_by(cmp),
87        }
88    }
89}
90
91fn sort_unstable_by_branch<T, C>(slice: &mut [T], options: SortOptions, cmp: C)
92where
93    T: Send,
94    C: Send + Sync + Fn(&T, &T) -> Ordering,
95{
96    if options.multithreaded {
97        RAYON.install(|| match options.descending {
98            true => slice.par_sort_unstable_by(|a, b| cmp(b, a)),
99            false => slice.par_sort_unstable_by(cmp),
100        })
101    } else {
102        match options.descending {
103            true => slice.sort_unstable_by(|a, b| cmp(b, a)),
104            false => slice.sort_unstable_by(cmp),
105        }
106    }
107}
108
109// Reduce monomorphisation.
110fn sort_impl_unstable<T>(vals: &mut [T], options: SortOptions)
111where
112    T: TotalOrd + Send + Sync,
113{
114    sort_unstable_by_branch(vals, options, TotalOrd::tot_cmp);
115}
116
117fn create_validity(len: usize, null_count: usize, nulls_last: bool) -> Bitmap {
118    let mut validity = BitmapBuilder::with_capacity(len);
119    if nulls_last {
120        validity.extend_constant(len - null_count, true);
121        validity.extend_constant(null_count, false);
122    } else {
123        validity.extend_constant(null_count, false);
124        validity.extend_constant(len - null_count, true);
125    }
126    validity.freeze()
127}
128
129macro_rules! sort_with_fast_path {
130    ($ca:ident, $options:expr) => {{
131        if $ca.is_empty() {
132            return $ca.clone();
133        }
134
135        // we can clone if we sort in same order
136        if $options.descending && $ca.is_sorted_descending_flag() || ($ca.is_sorted_ascending_flag() && !$options.descending) {
137            // there are nulls
138            if $ca.null_count() > 0 {
139                // if the nulls are already last we can clone
140                if $options.nulls_last && $ca.get($ca.len() - 1).is_none()  ||
141                // if the nulls are already first we can clone
142                (!$options.nulls_last && $ca.get(0).is_none())
143                {
144                    return $ca.clone();
145                }
146                // nulls are not at the right place
147                // continue w/ sorting
148                // TODO: we can optimize here and just put the null at the correct place
149            } else {
150                return $ca.clone();
151            }
152        }
153        // we can reverse if we sort in other order
154        else if ($options.descending && $ca.is_sorted_ascending_flag() || $ca.is_sorted_descending_flag()) && $ca.null_count() == 0 {
155            return $ca.reverse()
156        };
157
158
159    }}
160}
161
162macro_rules! arg_sort_fast_path {
163    ($ca:ident,  $options:expr) => {{
164        // if already sorted in required order we can just return 0..len
165        if $options.limit.is_none() &&
166        ($options.descending && $ca.is_sorted_descending_flag() || ($ca.is_sorted_ascending_flag() && !$options.descending)) {
167            // there are nulls
168            if $ca.null_count() > 0 {
169                // if the nulls are already last we can return 0..len
170                if ($options.nulls_last && $ca.get($ca.len() - 1).is_none() ) ||
171                // if the nulls are already first we can return 0..len
172                (! $options.nulls_last && $ca.get(0).is_none())
173                {
174                   return ChunkedArray::with_chunk($ca.name().clone(),
175                    IdxArr::from_data_default(Buffer::from((0..($ca.len() as IdxSize)).collect::<Vec<IdxSize>>()), None));
176                }
177                // nulls are not at the right place
178                // continue w/ sorting
179                // TODO: we can optimize here and just put the null at the correct place
180            } else {
181                // no nulls
182                return ChunkedArray::with_chunk($ca.name().clone(),
183                IdxArr::from_data_default(Buffer::from((0..($ca.len() as IdxSize )).collect::<Vec<IdxSize>>()), None));
184            }
185        }
186    }}
187}
188
189fn sort_with_numeric<T>(ca: &ChunkedArray<T>, options: SortOptions) -> ChunkedArray<T>
190where
191    T: PolarsNumericType,
192{
193    sort_with_fast_path!(ca, options);
194    if ca.null_count() == 0 {
195        let mut vals = ca.to_vec_null_aware().left().unwrap();
196
197        sort_impl_unstable(vals.as_mut_slice(), options);
198
199        let mut ca = ChunkedArray::from_vec(ca.name().clone(), vals);
200        let s = if options.descending {
201            IsSorted::Descending
202        } else {
203            IsSorted::Ascending
204        };
205        ca.set_sorted_flag(s);
206        ca
207    } else {
208        let null_count = ca.null_count();
209        let len = ca.len();
210
211        let mut vals = Vec::with_capacity(ca.len());
212
213        if !options.nulls_last {
214            let iter = std::iter::repeat_n(T::Native::default(), null_count);
215            vals.extend(iter);
216        }
217
218        ca.downcast_iter().for_each(|arr| {
219            let iter = arr.iter().filter_map(|v| v.copied());
220            vals.extend(iter);
221        });
222        let mut_slice = if options.nulls_last {
223            &mut vals[..len - null_count]
224        } else {
225            &mut vals[null_count..]
226        };
227
228        sort_impl_unstable(mut_slice, options);
229
230        if options.nulls_last {
231            vals.extend(std::iter::repeat_n(T::Native::default(), ca.null_count()));
232        }
233
234        let arr = PrimitiveArray::new(
235            T::get_static_dtype().to_arrow(CompatLevel::newest()),
236            vals.into(),
237            Some(create_validity(len, null_count, options.nulls_last)),
238        );
239        let mut new_ca = ChunkedArray::with_chunk(ca.name().clone(), arr);
240        let s = if options.descending {
241            IsSorted::Descending
242        } else {
243            IsSorted::Ascending
244        };
245        new_ca.set_sorted_flag(s);
246        new_ca
247    }
248}
249
250fn arg_sort_numeric<T>(ca: &ChunkedArray<T>, mut options: SortOptions) -> IdxCa
251where
252    T: PolarsNumericType,
253{
254    options.multithreaded &= RAYON.current_num_threads() > 1;
255    arg_sort_fast_path!(ca, options);
256    if ca.null_count() == 0 {
257        let iter = ca
258            .downcast_iter()
259            .map(|arr| arr.values().as_slice().iter().copied());
260        arg_sort::arg_sort_no_nulls(
261            ca.name().clone(),
262            iter,
263            options,
264            ca.len(),
265            ca.is_sorted_flag(),
266        )
267    } else {
268        let iter = ca
269            .downcast_iter()
270            .map(|arr| arr.iter().map(|opt| opt.copied()));
271        arg_sort::arg_sort(
272            ca.name().clone(),
273            iter,
274            options,
275            ca.null_count(),
276            ca.len(),
277            ca.is_sorted_flag(),
278            ca.get(0).is_none(),
279        )
280    }
281}
282
283fn arg_sort_multiple_numeric<T: PolarsNumericType>(
284    ca: &ChunkedArray<T>,
285    by: &[Column],
286    options: &SortMultipleOptions,
287) -> PolarsResult<IdxCa> {
288    args_validate(ca, by, &options.descending, "descending")?;
289    args_validate(ca, by, &options.nulls_last, "nulls_last")?;
290    let mut count: IdxSize = 0;
291
292    let no_nulls = ca.null_count() == 0;
293
294    if no_nulls {
295        let mut vals = Vec::with_capacity(ca.len());
296        for arr in ca.downcast_iter() {
297            vals.extend_trusted_len(arr.values().as_slice().iter().map(|v| {
298                let i = count;
299                count += 1;
300                (i, NonNull(*v))
301            }))
302        }
303        arg_sort_multiple_impl(vals, by, options)
304    } else {
305        let mut vals = Vec::with_capacity(ca.len());
306        for arr in ca.downcast_iter() {
307            vals.extend_trusted_len(arr.into_iter().map(|v| {
308                let i = count;
309                count += 1;
310                (i, v.copied())
311            }));
312        }
313        arg_sort_multiple_impl(vals, by, options)
314    }
315}
316
317impl<T> ChunkSort<T> for ChunkedArray<T>
318where
319    T: PolarsNumericType,
320{
321    fn sort_with(&self, mut options: SortOptions) -> ChunkedArray<T> {
322        options.multithreaded &= RAYON.current_num_threads() > 1;
323        sort_with_numeric(self, options)
324    }
325
326    fn sort(&self, descending: bool) -> ChunkedArray<T> {
327        self.sort_with(SortOptions {
328            descending,
329            ..Default::default()
330        })
331    }
332
333    fn arg_sort(&self, options: SortOptions) -> IdxCa {
334        arg_sort_numeric(self, options)
335    }
336
337    /// # Panics
338    ///
339    /// This function is very opinionated.
340    /// We assume that all numeric `Series` are of the same type, if not it will panic
341    fn arg_sort_multiple(
342        &self,
343        by: &[Column],
344        options: &SortMultipleOptions,
345    ) -> PolarsResult<IdxCa> {
346        arg_sort_multiple_numeric(self, by, options)
347    }
348}
349
350fn ordering_other_columns<'a>(
351    compare_inner: &'a [Box<dyn TotalOrdInner + 'a>],
352    descending: &[bool],
353    nulls_last: &[bool],
354    idx_a: usize,
355    idx_b: usize,
356) -> Ordering {
357    for ((cmp, descending), nulls_last) in compare_inner.iter().zip(descending).zip(nulls_last) {
358        // SAFETY: indices are in bounds
359        let ordering =
360            unsafe { (**cmp).cmp_element_unchecked(idx_a, idx_b, *descending, *nulls_last) };
361        if !ordering.is_eq() {
362            return ordering;
363        }
364    }
365    // all arrays/columns exhausted, ordering equal it is.
366    Ordering::Equal
367}
368
369impl ChunkSort<StringType> for StringChunked {
370    fn sort_with(&self, options: SortOptions) -> ChunkedArray<StringType> {
371        unsafe { self.as_binary().sort_with(options).to_string_unchecked() }
372    }
373
374    fn sort(&self, descending: bool) -> StringChunked {
375        self.sort_with(SortOptions {
376            descending,
377            nulls_last: false,
378            multithreaded: true,
379            maintain_order: false,
380            limit: None,
381        })
382    }
383
384    fn arg_sort(&self, options: SortOptions) -> IdxCa {
385        self.as_binary().arg_sort(options)
386    }
387
388    /// # Panics
389    ///
390    /// This function is very opinionated. On the implementation of `ChunkedArray<T>` for numeric types,
391    /// we assume that all numeric `Series` are of the same type.
392    ///
393    /// In this case we assume that all numeric `Series` are `f64` types. The caller needs to
394    /// uphold this contract. If not, it will panic.
395    ///
396    fn arg_sort_multiple(
397        &self,
398        by: &[Column],
399        options: &SortMultipleOptions,
400    ) -> PolarsResult<IdxCa> {
401        self.as_binary().arg_sort_multiple(by, options)
402    }
403}
404
405impl ChunkSort<BinaryType> for BinaryChunked {
406    fn sort_with(&self, mut options: SortOptions) -> ChunkedArray<BinaryType> {
407        options.multithreaded &= RAYON.current_num_threads() > 1;
408        sort_with_fast_path!(self, options);
409        // We will sort by the views and reconstruct with sorted views. We leave the buffers as is.
410        // We must rechunk to ensure that all views point into the proper buffers.
411        let ca = self.rechunk();
412        let arr = ca.downcast_as_array().clone();
413
414        let (views, buffers, validity, total_bytes_len, total_buffer_len) = arr.into_inner();
415        let mut views = views.to_vec();
416
417        let (partitioned_part, validity) = partition_nulls(&mut views, validity, options);
418
419        sort_unstable_by_branch(partitioned_part, options, |a, b| unsafe {
420            a.get_slice_unchecked(&buffers)
421                .tot_cmp(&b.get_slice_unchecked(&buffers))
422        });
423
424        let array = unsafe {
425            BinaryViewArray::new_unchecked(
426                ArrowDataType::BinaryView,
427                views.into(),
428                buffers,
429                validity,
430                total_bytes_len,
431                total_buffer_len,
432            )
433        };
434
435        let mut out = Self::with_chunk_like(self, array);
436
437        let s = if options.descending {
438            IsSorted::Descending
439        } else {
440            IsSorted::Ascending
441        };
442        out.set_sorted_flag(s);
443        out
444    }
445
446    fn sort(&self, descending: bool) -> ChunkedArray<BinaryType> {
447        self.sort_with(SortOptions {
448            descending,
449            nulls_last: false,
450            multithreaded: true,
451            maintain_order: false,
452            limit: None,
453        })
454    }
455
456    fn arg_sort(&self, options: SortOptions) -> IdxCa {
457        arg_sort_fast_path!(self, options);
458        if self.null_count() == 0 {
459            arg_sort::arg_sort_no_nulls(
460                self.name().clone(),
461                self.downcast_iter().map(|arr| arr.values_iter()),
462                options,
463                self.len(),
464                self.is_sorted_flag(),
465            )
466        } else {
467            arg_sort::arg_sort(
468                self.name().clone(),
469                self.downcast_iter().map(|arr| arr.iter()),
470                options,
471                self.null_count(),
472                self.len(),
473                self.is_sorted_flag(),
474                self.get(0).is_none(),
475            )
476        }
477    }
478
479    fn arg_sort_multiple(
480        &self,
481        by: &[Column],
482        options: &SortMultipleOptions,
483    ) -> PolarsResult<IdxCa> {
484        args_validate(self, by, &options.descending, "descending")?;
485        args_validate(self, by, &options.nulls_last, "nulls_last")?;
486        let mut count: IdxSize = 0;
487
488        let mut vals = Vec::with_capacity(self.len());
489        for arr in self.downcast_iter() {
490            for v in arr {
491                let i = count;
492                count += 1;
493                vals.push((i, v))
494            }
495        }
496
497        arg_sort_multiple_impl(vals, by, options)
498    }
499}
500
501impl ChunkSort<BinaryOffsetType> for BinaryOffsetChunked {
502    fn sort_with(&self, mut options: SortOptions) -> BinaryOffsetChunked {
503        options.multithreaded &= RAYON.current_num_threads() > 1;
504        sort_with_fast_path!(self, options);
505
506        let mut v: Vec<&[u8]> = Vec::with_capacity(self.len());
507        for arr in self.downcast_iter() {
508            v.extend(arr.non_null_values_iter());
509        }
510
511        sort_impl_unstable(v.as_mut_slice(), options);
512
513        let mut values = Vec::<u8>::with_capacity(self.get_values_size());
514        let mut offsets = Vec::<i64>::with_capacity(self.len() + 1);
515        let mut length_so_far = 0i64;
516        offsets.push(length_so_far);
517
518        let len = self.len();
519        let null_count = self.null_count();
520        let mut ca: Self = match (null_count, options.nulls_last) {
521            (0, _) => {
522                for val in v {
523                    values.extend_from_slice(val);
524                    length_so_far = values.len() as i64;
525                    offsets.push(length_so_far);
526                }
527                // SAFETY: offsets are correctly created.
528                let arr = unsafe {
529                    BinaryArray::from_data_unchecked_default(offsets.into(), values.into(), None)
530                };
531                ChunkedArray::with_chunk(self.name().clone(), arr)
532            },
533            (_, true) => {
534                for val in v {
535                    values.extend_from_slice(val);
536                    length_so_far = values.len() as i64;
537                    offsets.push(length_so_far);
538                }
539                offsets.extend(std::iter::repeat_n(length_so_far, null_count));
540
541                // SAFETY: offsets are correctly created.
542                let arr = unsafe {
543                    BinaryArray::from_data_unchecked_default(
544                        offsets.into(),
545                        values.into(),
546                        Some(create_validity(len, null_count, true)),
547                    )
548                };
549                ChunkedArray::with_chunk(self.name().clone(), arr)
550            },
551            (_, false) => {
552                offsets.extend(std::iter::repeat_n(length_so_far, null_count));
553
554                for val in v {
555                    values.extend_from_slice(val);
556                    length_so_far = values.len() as i64;
557                    offsets.push(length_so_far);
558                }
559
560                // SAFETY: we pass valid UTF-8.
561                let arr = unsafe {
562                    BinaryArray::from_data_unchecked_default(
563                        offsets.into(),
564                        values.into(),
565                        Some(create_validity(len, null_count, false)),
566                    )
567                };
568                ChunkedArray::with_chunk(self.name().clone(), arr)
569            },
570        };
571
572        let s = if options.descending {
573            IsSorted::Descending
574        } else {
575            IsSorted::Ascending
576        };
577        ca.set_sorted_flag(s);
578        ca
579    }
580
581    fn sort(&self, descending: bool) -> BinaryOffsetChunked {
582        self.sort_with(SortOptions {
583            descending,
584            nulls_last: false,
585            multithreaded: true,
586            maintain_order: false,
587            limit: None,
588        })
589    }
590
591    fn arg_sort(&self, mut options: SortOptions) -> IdxCa {
592        options.multithreaded &= RAYON.current_num_threads() > 1;
593        let ca = self.rechunk();
594        let arr = ca.downcast_as_array();
595        let mut idx = (0..(arr.len() as IdxSize)).collect::<Vec<_>>();
596
597        let argsort = |args| {
598            if options.maintain_order {
599                sort_by_branch(
600                    args,
601                    options.descending,
602                    |a, b| unsafe {
603                        let a = arr.value_unchecked(*a as usize);
604                        let b = arr.value_unchecked(*b as usize);
605                        a.tot_cmp(&b)
606                    },
607                    options.multithreaded,
608                );
609            } else {
610                sort_unstable_by_branch(args, options, |a, b| unsafe {
611                    let a = arr.value_unchecked(*a as usize);
612                    let b = arr.value_unchecked(*b as usize);
613                    a.tot_cmp(&b)
614                });
615            }
616        };
617
618        if self.null_count() == 0 {
619            argsort(&mut idx);
620            IdxCa::from_vec(self.name().clone(), idx)
621        } else {
622            // This branch (almost?) never gets called as the row-encoding also encodes nulls.
623            let (partitioned_part, validity) =
624                partition_nulls(&mut idx, arr.validity().cloned(), options);
625            argsort(partitioned_part);
626            IdxCa::with_chunk(
627                self.name().clone(),
628                IdxArr::from_data_default(idx.into(), validity),
629            )
630        }
631    }
632
633    /// # Panics
634    ///
635    /// This function is very opinionated. On the implementation of `ChunkedArray<T>` for numeric types,
636    /// we assume that all numeric `Series` are of the same type.
637    ///
638    /// In this case we assume that all numeric `Series` are `f64` types. The caller needs to
639    /// uphold this contract. If not, it will panic.
640    fn arg_sort_multiple(
641        &self,
642        by: &[Column],
643        options: &SortMultipleOptions,
644    ) -> PolarsResult<IdxCa> {
645        args_validate(self, by, &options.descending, "descending")?;
646        args_validate(self, by, &options.nulls_last, "nulls_last")?;
647        let mut count: IdxSize = 0;
648
649        let mut vals = Vec::with_capacity(self.len());
650        for arr in self.downcast_iter() {
651            for v in arr {
652                let i = count;
653                count += 1;
654                vals.push((i, v))
655            }
656        }
657
658        arg_sort_multiple_impl(vals, by, options)
659    }
660}
661
662#[cfg(feature = "dtype-struct")]
663impl ChunkSort<StructType> for StructChunked {
664    fn sort_with(&self, mut options: SortOptions) -> ChunkedArray<StructType> {
665        options.multithreaded &= RAYON.current_num_threads() > 1;
666        let idx = self.arg_sort(options);
667        let mut out = unsafe { self.take_unchecked(&idx) };
668
669        let s = if options.descending {
670            IsSorted::Descending
671        } else {
672            IsSorted::Ascending
673        };
674        out.set_sorted_flag(s);
675        out
676    }
677
678    fn sort(&self, descending: bool) -> ChunkedArray<StructType> {
679        self.sort_with(SortOptions::new().with_order_descending(descending))
680    }
681
682    fn arg_sort(&self, options: SortOptions) -> IdxCa {
683        let bin = self.get_row_encoded(options).unwrap();
684        bin.arg_sort(Default::default())
685    }
686}
687
688impl ChunkSort<ListType> for ListChunked {
689    fn sort_with(&self, mut options: SortOptions) -> ListChunked {
690        options.multithreaded &= RAYON.current_num_threads() > 1;
691        let idx = self.arg_sort(options);
692        let mut out = unsafe { self.take_unchecked(&idx) };
693
694        let s = if options.descending {
695            IsSorted::Descending
696        } else {
697            IsSorted::Ascending
698        };
699        out.set_sorted_flag(s);
700        out
701    }
702
703    fn sort(&self, descending: bool) -> ListChunked {
704        self.sort_with(SortOptions::new().with_order_descending(descending))
705    }
706
707    fn arg_sort(&self, options: SortOptions) -> IdxCa {
708        let bin = _get_rows_encoded_ca(
709            self.name().clone(),
710            &[self.clone().into_column()],
711            &[options.descending],
712            &[options.nulls_last],
713            false,
714        )
715        .unwrap();
716        bin.arg_sort(Default::default())
717    }
718}
719
720impl ChunkSort<BooleanType> for BooleanChunked {
721    fn sort_with(&self, mut options: SortOptions) -> ChunkedArray<BooleanType> {
722        options.multithreaded &= RAYON.current_num_threads() > 1;
723        sort_with_fast_path!(self, options);
724        let mut bitmap = BitmapBuilder::with_capacity(self.len());
725        let mut validity =
726            (self.null_count() > 0).then(|| BitmapBuilder::with_capacity(self.len()));
727
728        if self.null_count() > 0 && !options.nulls_last {
729            bitmap.extend_constant(self.null_count(), false);
730            if let Some(validity) = &mut validity {
731                validity.extend_constant(self.null_count(), false);
732            }
733        }
734
735        let n_valid = self.len() - self.null_count();
736        let n_set = self.sum().unwrap() as usize;
737        if options.descending {
738            bitmap.extend_constant(n_set, true);
739            bitmap.extend_constant(n_valid - n_set, false);
740        } else {
741            bitmap.extend_constant(n_valid - n_set, false);
742            bitmap.extend_constant(n_set, true);
743        }
744        if let Some(validity) = &mut validity {
745            validity.extend_constant(n_valid, true);
746        }
747
748        if self.null_count() > 0 && options.nulls_last {
749            bitmap.extend_constant(self.null_count(), false);
750            if let Some(validity) = &mut validity {
751                validity.extend_constant(self.null_count(), false);
752            }
753        }
754
755        let mut ca = Self::from_chunk_iter(
756            self.name().clone(),
757            Some(BooleanArray::from_data_default(
758                bitmap.freeze(),
759                validity.map(|v| v.freeze()),
760            )),
761        );
762        ca.set_sorted_flag(if options.descending {
763            IsSorted::Descending
764        } else {
765            IsSorted::Ascending
766        });
767        ca
768    }
769
770    fn sort(&self, descending: bool) -> BooleanChunked {
771        self.sort_with(SortOptions {
772            descending,
773            nulls_last: false,
774            multithreaded: true,
775            maintain_order: false,
776            limit: None,
777        })
778    }
779
780    fn arg_sort(&self, options: SortOptions) -> IdxCa {
781        arg_sort_fast_path!(self, options);
782        if self.null_count() == 0 {
783            arg_sort::arg_sort_no_nulls(
784                self.name().clone(),
785                self.downcast_iter().map(|arr| arr.values_iter()),
786                options,
787                self.len(),
788                self.is_sorted_flag(),
789            )
790        } else {
791            arg_sort::arg_sort(
792                self.name().clone(),
793                self.downcast_iter().map(|arr| arr.iter()),
794                options,
795                self.null_count(),
796                self.len(),
797                self.is_sorted_flag(),
798                self.get(0).is_none(),
799            )
800        }
801    }
802    fn arg_sort_multiple(
803        &self,
804        by: &[Column],
805        options: &SortMultipleOptions,
806    ) -> PolarsResult<IdxCa> {
807        let mut vals = Vec::with_capacity(self.len());
808        let mut count: IdxSize = 0;
809        for arr in self.downcast_iter() {
810            vals.extend_trusted_len(arr.into_iter().map(|v| {
811                let i = count;
812                count += 1;
813                (i, v.map(|v| v as u8))
814            }));
815        }
816        arg_sort_multiple_impl(vals, by, options)
817    }
818}
819
820pub fn _broadcast_bools(n_cols: usize, values: &mut Vec<bool>) {
821    if n_cols > values.len() && values.len() == 1 {
822        while n_cols != values.len() {
823            values.push(values[0]);
824        }
825    }
826}
827
828/// # Panics
829/// Panics if `columns` is empty.
830pub fn arg_sort(columns: &[Column], mut sort_options: SortMultipleOptions) -> PolarsResult<IdxCa> {
831    assert!(!columns.is_empty());
832
833    for column in columns {
834        if column.dtype().is_object() {
835            polars_bail!(
836                InvalidOperation: "column '{}' has a dtype of '{}', which does not support sorting", column.name(), column.dtype()
837            )
838        }
839    }
840
841    if let [c] = columns {
842        Ok(c.arg_sort(SortOptions {
843            descending: sort_options.descending[0],
844            nulls_last: sort_options.nulls_last[0],
845            multithreaded: sort_options.multithreaded,
846            maintain_order: sort_options.maintain_order,
847            limit: sort_options.limit,
848        }))
849    } else if sort_options.nulls_last.iter().all(|&x| x)
850        || columns.iter().any(|c| c.dtype().is_nested())
851        || std::env::var("POLARS_ROW_FMT_SORT").is_ok()
852    {
853        argsort_multiple_row_fmt(
854            columns,
855            sort_options.descending,
856            sort_options.nulls_last,
857            sort_options.multithreaded,
858        )
859    } else {
860        let n_cols = columns.len();
861
862        _broadcast_bools(n_cols, &mut sort_options.descending);
863        _broadcast_bools(n_cols, &mut sort_options.nulls_last);
864
865        columns[0]
866            .clone()
867            .as_materialized_series()
868            .arg_sort_multiple(&columns[1..], &sort_options)
869    }
870}
871
872/// This is a perfect sort particularly useful for an arg_sort of an arg_sort
873/// The second arg_sort sorts indices from `0` to `len` so can be just assigned to the
874/// new index location.
875///
876/// Besides that we know that all indices are unique and thus not alias so we can parallelize.
877///
878/// This sort does not sort in place and will allocate.
879///
880/// - The right indices are used for sorting
881/// - The left indices are placed at the location right points to.
882///
883/// # Safety
884/// The caller must ensure that the right indexes for `&[(_, IdxSize)]` are integers ranging from `0..idx.len`
885pub unsafe fn perfect_sort(idx: &[(IdxSize, IdxSize)], out: &mut Vec<IdxSize>) {
886    let chunk_size = std::cmp::max(
887        idx.len() / RAYON.current_num_threads(),
888        RAYON.current_num_threads(),
889    );
890
891    out.reserve(idx.len());
892    let ptr = out.as_mut_ptr() as *const IdxSize as usize;
893
894    RAYON.install(|| {
895        idx.par_chunks(chunk_size).for_each(|indices| {
896            let ptr = ptr as *mut IdxSize;
897            for (idx_val, idx_location) in indices {
898                // SAFETY:
899                // idx_location is in bounds by invariant of this function
900                // and we ensured we have at least `idx.len()` capacity
901                unsafe { *ptr.add(*idx_location as usize) = *idx_val };
902            }
903        });
904    });
905    // SAFETY:
906    // all elements are written
907    unsafe { out.set_len(idx.len()) };
908}
909
910#[cfg(test)]
911mod test {
912    use crate::prelude::*;
913    #[test]
914    fn test_arg_sort() {
915        let a = Int32Chunked::new(
916            PlSmallStr::from_static("a"),
917            &[
918                Some(1), // 0
919                Some(5), // 1
920                None,    // 2
921                Some(1), // 3
922                None,    // 4
923                Some(4), // 5
924                Some(3), // 6
925                Some(1), // 7
926            ],
927        );
928        let idx = a.arg_sort(SortOptions {
929            descending: false,
930            ..Default::default()
931        });
932        let idx = idx.cont_slice().unwrap();
933
934        let expected = [2, 4, 0, 3, 7, 6, 5, 1];
935        assert_eq!(idx, expected);
936
937        let idx = a.arg_sort(SortOptions {
938            descending: true,
939            ..Default::default()
940        });
941        let idx = idx.cont_slice().unwrap();
942        // the duplicates are in reverse order of appearance, so we cannot reverse expected
943        let expected = [2, 4, 1, 5, 6, 0, 3, 7];
944        assert_eq!(idx, expected);
945    }
946
947    #[test]
948    fn test_sort() {
949        let a = Int32Chunked::new(
950            PlSmallStr::from_static("a"),
951            &[
952                Some(1),
953                Some(5),
954                None,
955                Some(1),
956                None,
957                Some(4),
958                Some(3),
959                Some(1),
960            ],
961        );
962        let out = a.sort_with(SortOptions {
963            descending: false,
964            nulls_last: false,
965            multithreaded: true,
966            maintain_order: false,
967            limit: None,
968        });
969        assert_eq!(
970            Vec::from(&out),
971            &[
972                None,
973                None,
974                Some(1),
975                Some(1),
976                Some(1),
977                Some(3),
978                Some(4),
979                Some(5)
980            ]
981        );
982        let out = a.sort_with(SortOptions {
983            descending: false,
984            nulls_last: true,
985            multithreaded: true,
986            maintain_order: false,
987            limit: None,
988        });
989        assert_eq!(
990            Vec::from(&out),
991            &[
992                Some(1),
993                Some(1),
994                Some(1),
995                Some(3),
996                Some(4),
997                Some(5),
998                None,
999                None
1000            ]
1001        );
1002        let b = BooleanChunked::new(
1003            PlSmallStr::from_static("b"),
1004            &[Some(false), Some(true), Some(false)],
1005        );
1006        let out = b.sort_with(SortOptions::default().with_order_descending(true));
1007        assert_eq!(Vec::from(&out), &[Some(true), Some(false), Some(false)]);
1008        let out = b.sort_with(SortOptions::default().with_order_descending(false));
1009        assert_eq!(Vec::from(&out), &[Some(false), Some(false), Some(true)]);
1010    }
1011
1012    #[test]
1013    #[cfg_attr(miri, ignore)]
1014    fn test_arg_sort_multiple() -> PolarsResult<()> {
1015        let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 2, 1, 1, 3, 4, 3, 3]);
1016        let b = Int64Chunked::new(PlSmallStr::from_static("b"), &[0, 1, 2, 3, 4, 5, 6, 1]);
1017        let c = StringChunked::new(
1018            PlSmallStr::from_static("c"),
1019            &["a", "b", "c", "d", "e", "f", "g", "h"],
1020        );
1021        let df = DataFrame::new_infer_height(vec![
1022            a.into_series().into(),
1023            b.into_series().into(),
1024            c.into_series().into(),
1025        ])?;
1026
1027        let out = df.sort(["a", "b", "c"], SortMultipleOptions::default())?;
1028        assert_eq!(
1029            Vec::from(out.column("b")?.as_series().unwrap().i64()?),
1030            &[
1031                Some(0),
1032                Some(2),
1033                Some(3),
1034                Some(1),
1035                Some(1),
1036                Some(4),
1037                Some(6),
1038                Some(5)
1039            ]
1040        );
1041
1042        // now let the first sort be a string
1043        let a = StringChunked::new(
1044            PlSmallStr::from_static("a"),
1045            &["a", "b", "c", "a", "b", "c"],
1046        )
1047        .into_series();
1048        let b = Int32Chunked::new(PlSmallStr::from_static("b"), &[5, 4, 2, 3, 4, 5]).into_series();
1049        let df = DataFrame::new_infer_height(vec![a.into(), b.into()])?;
1050
1051        let out = df.sort(["a", "b"], SortMultipleOptions::default())?;
1052        let expected = df!(
1053            "a" => ["a", "a", "b", "b", "c", "c"],
1054            "b" => [3, 5, 4, 4, 2, 5]
1055        )?;
1056        assert!(out.equals(&expected));
1057
1058        let df = df!(
1059            "groups" => [1, 2, 3],
1060            "values" => ["a", "a", "b"]
1061        )?;
1062
1063        let out = df.sort(
1064            ["groups", "values"],
1065            SortMultipleOptions::default().with_order_descending_multi([true, false]),
1066        )?;
1067        let expected = df!(
1068            "groups" => [3, 2, 1],
1069            "values" => ["b", "a", "a"]
1070        )?;
1071        assert!(out.equals(&expected));
1072
1073        let out = df.sort(
1074            ["values", "groups"],
1075            SortMultipleOptions::default().with_order_descending_multi([false, true]),
1076        )?;
1077        let expected = df!(
1078            "groups" => [2, 1, 3],
1079            "values" => ["a", "a", "b"]
1080        )?;
1081        assert!(out.equals(&expected));
1082
1083        Ok(())
1084    }
1085
1086    #[test]
1087    fn test_sort_string() {
1088        let ca = StringChunked::new(
1089            PlSmallStr::from_static("a"),
1090            &[Some("a"), None, Some("c"), None, Some("b")],
1091        );
1092        let out = ca.sort_with(SortOptions {
1093            descending: false,
1094            nulls_last: false,
1095            multithreaded: true,
1096            maintain_order: false,
1097            limit: None,
1098        });
1099        let expected = &[None, None, Some("a"), Some("b"), Some("c")];
1100        assert_eq!(Vec::from(&out), expected);
1101
1102        let out = ca.sort_with(SortOptions {
1103            descending: true,
1104            nulls_last: false,
1105            multithreaded: true,
1106            maintain_order: false,
1107            limit: None,
1108        });
1109
1110        let expected = &[None, None, Some("c"), Some("b"), Some("a")];
1111        assert_eq!(Vec::from(&out), expected);
1112
1113        let out = ca.sort_with(SortOptions {
1114            descending: false,
1115            nulls_last: true,
1116            multithreaded: true,
1117            maintain_order: false,
1118            limit: None,
1119        });
1120        let expected = &[Some("a"), Some("b"), Some("c"), None, None];
1121        assert_eq!(Vec::from(&out), expected);
1122
1123        let out = ca.sort_with(SortOptions {
1124            descending: true,
1125            nulls_last: true,
1126            multithreaded: true,
1127            maintain_order: false,
1128            limit: None,
1129        });
1130        let expected = &[Some("c"), Some("b"), Some("a"), None, None];
1131        assert_eq!(Vec::from(&out), expected);
1132
1133        // no nulls
1134        let ca = StringChunked::new(
1135            PlSmallStr::from_static("a"),
1136            &[Some("a"), Some("c"), Some("b")],
1137        );
1138        let out = ca.sort(false);
1139        let expected = &[Some("a"), Some("b"), Some("c")];
1140        assert_eq!(Vec::from(&out), expected);
1141
1142        let out = ca.sort(true);
1143        let expected = &[Some("c"), Some("b"), Some("a")];
1144        assert_eq!(Vec::from(&out), expected);
1145    }
1146}