1mod arg_sort;
2
3pub mod arg_sort_multiple;
4
5pub mod arg_bottom_k;
6pub mod options;
7
8#[cfg(feature = "dtype-categorical")]
9mod categorical;
10
11use std::cmp::Ordering;
12
13pub(crate) use arg_sort::arg_sort_row_fmt;
14pub(crate) use arg_sort_multiple::argsort_multiple_row_fmt;
15use arrow::bitmap::{Bitmap, BitmapBuilder};
16use arrow::legacy::trusted_len::TrustedLenPush;
17use compare_inner::NonNull;
18use polars_buffer::Buffer;
19use polars_utils::nulls::IsNull;
20use polars_utils::sort::reorder_cmp;
21use rayon::prelude::*;
22pub use slice::*;
23
24use super::row_encode::_get_rows_encoded_ca;
25use crate::prelude::compare_inner::TotalOrdInner;
26use crate::prelude::sort::arg_sort_multiple::*;
27use crate::prelude::*;
28use crate::runtime::RAYON;
29use crate::series::IsSorted;
30use crate::utils::NoNull;
31
32fn partition_nulls<T: Copy>(
33 values: &mut [T],
34 mut validity: Option<Bitmap>,
35 options: SortOptions,
36) -> (&mut [T], Option<Bitmap>) {
37 let partitioned = if let Some(bitmap) = &validity {
38 let mut out_len = 0;
40 for idx in bitmap.true_idx_iter() {
41 unsafe { *values.get_unchecked_mut(out_len) = *values.get_unchecked(idx) };
42 out_len += 1;
43 }
44 let valid_count = out_len;
45 let null_count = values.len() - valid_count;
46 validity = Some(create_validity(
47 bitmap.len(),
48 bitmap.unset_bits(),
49 options.nulls_last,
50 ));
51
52 if options.nulls_last {
54 &mut values[..valid_count]
55 }
56 else {
58 let mut end = values.len() - 1;
60
61 for i in 0..null_count {
62 unsafe { *values.get_unchecked_mut(end) = *values.get_unchecked(i) };
63 end = end.saturating_sub(1);
64 }
65 &mut values[null_count..]
66 }
67 } else {
68 values
69 };
70 (partitioned, validity)
71}
72
73pub(crate) fn sort_by_branch<T, C>(slice: &mut [T], descending: bool, cmp: C, parallel: bool)
74where
75 T: Send,
76 C: Send + Sync + Fn(&T, &T) -> Ordering,
77{
78 if parallel {
79 RAYON.install(|| match descending {
80 true => slice.par_sort_by(|a, b| cmp(b, a)),
81 false => slice.par_sort_by(cmp),
82 })
83 } else {
84 match descending {
85 true => slice.sort_by(|a, b| cmp(b, a)),
86 false => slice.sort_by(cmp),
87 }
88 }
89}
90
91fn sort_unstable_by_branch<T, C>(slice: &mut [T], options: SortOptions, cmp: C)
92where
93 T: Send,
94 C: Send + Sync + Fn(&T, &T) -> Ordering,
95{
96 if options.multithreaded {
97 RAYON.install(|| match options.descending {
98 true => slice.par_sort_unstable_by(|a, b| cmp(b, a)),
99 false => slice.par_sort_unstable_by(cmp),
100 })
101 } else {
102 match options.descending {
103 true => slice.sort_unstable_by(|a, b| cmp(b, a)),
104 false => slice.sort_unstable_by(cmp),
105 }
106 }
107}
108
109fn sort_impl_unstable<T>(vals: &mut [T], options: SortOptions)
111where
112 T: TotalOrd + Send + Sync,
113{
114 sort_unstable_by_branch(vals, options, TotalOrd::tot_cmp);
115}
116
117fn create_validity(len: usize, null_count: usize, nulls_last: bool) -> Bitmap {
118 let mut validity = BitmapBuilder::with_capacity(len);
119 if nulls_last {
120 validity.extend_constant(len - null_count, true);
121 validity.extend_constant(null_count, false);
122 } else {
123 validity.extend_constant(null_count, false);
124 validity.extend_constant(len - null_count, true);
125 }
126 validity.freeze()
127}
128
129macro_rules! sort_with_fast_path {
130 ($ca:ident, $options:expr) => {{
131 if $ca.is_empty() {
132 return $ca.clone();
133 }
134
135 if $options.descending && $ca.is_sorted_descending_flag() || ($ca.is_sorted_ascending_flag() && !$options.descending) {
137 if $ca.null_count() > 0 {
139 if $options.nulls_last && $ca.get($ca.len() - 1).is_none() ||
141 (!$options.nulls_last && $ca.get(0).is_none())
143 {
144 return $ca.clone();
145 }
146 } else {
150 return $ca.clone();
151 }
152 }
153 else if ($options.descending && $ca.is_sorted_ascending_flag() || $ca.is_sorted_descending_flag()) && $ca.null_count() == 0 {
155 return $ca.reverse()
156 };
157
158
159 }}
160}
161
162macro_rules! arg_sort_fast_path {
163 ($ca:ident, $options:expr) => {{
164 if $options.limit.is_none() &&
166 ($options.descending && $ca.is_sorted_descending_flag() || ($ca.is_sorted_ascending_flag() && !$options.descending)) {
167 if $ca.null_count() > 0 {
169 if ($options.nulls_last && $ca.get($ca.len() - 1).is_none() ) ||
171 (! $options.nulls_last && $ca.get(0).is_none())
173 {
174 return ChunkedArray::with_chunk($ca.name().clone(),
175 IdxArr::from_data_default(Buffer::from((0..($ca.len() as IdxSize)).collect::<Vec<IdxSize>>()), None));
176 }
177 } else {
181 return ChunkedArray::with_chunk($ca.name().clone(),
183 IdxArr::from_data_default(Buffer::from((0..($ca.len() as IdxSize )).collect::<Vec<IdxSize>>()), None));
184 }
185 }
186 }}
187}
188
189fn sort_with_numeric<T>(ca: &ChunkedArray<T>, options: SortOptions) -> ChunkedArray<T>
190where
191 T: PolarsNumericType,
192{
193 sort_with_fast_path!(ca, options);
194 if ca.null_count() == 0 {
195 let mut vals = ca.to_vec_null_aware().left().unwrap();
196
197 sort_impl_unstable(vals.as_mut_slice(), options);
198
199 let mut ca = ChunkedArray::from_vec(ca.name().clone(), vals);
200 let s = if options.descending {
201 IsSorted::Descending
202 } else {
203 IsSorted::Ascending
204 };
205 ca.set_sorted_flag(s);
206 ca
207 } else {
208 let null_count = ca.null_count();
209 let len = ca.len();
210
211 let mut vals = Vec::with_capacity(ca.len());
212
213 if !options.nulls_last {
214 let iter = std::iter::repeat_n(T::Native::default(), null_count);
215 vals.extend(iter);
216 }
217
218 ca.downcast_iter().for_each(|arr| {
219 let iter = arr.iter().filter_map(|v| v.copied());
220 vals.extend(iter);
221 });
222 let mut_slice = if options.nulls_last {
223 &mut vals[..len - null_count]
224 } else {
225 &mut vals[null_count..]
226 };
227
228 sort_impl_unstable(mut_slice, options);
229
230 if options.nulls_last {
231 vals.extend(std::iter::repeat_n(T::Native::default(), ca.null_count()));
232 }
233
234 let arr = PrimitiveArray::new(
235 T::get_static_dtype().to_arrow(CompatLevel::newest()),
236 vals.into(),
237 Some(create_validity(len, null_count, options.nulls_last)),
238 );
239 let mut new_ca = ChunkedArray::with_chunk(ca.name().clone(), arr);
240 let s = if options.descending {
241 IsSorted::Descending
242 } else {
243 IsSorted::Ascending
244 };
245 new_ca.set_sorted_flag(s);
246 new_ca
247 }
248}
249
250fn arg_sort_numeric<T>(ca: &ChunkedArray<T>, mut options: SortOptions) -> IdxCa
251where
252 T: PolarsNumericType,
253{
254 options.multithreaded &= RAYON.current_num_threads() > 1;
255 arg_sort_fast_path!(ca, options);
256 if ca.null_count() == 0 {
257 let iter = ca
258 .downcast_iter()
259 .map(|arr| arr.values().as_slice().iter().copied());
260 arg_sort::arg_sort_no_nulls(
261 ca.name().clone(),
262 iter,
263 options,
264 ca.len(),
265 ca.is_sorted_flag(),
266 )
267 } else {
268 let iter = ca
269 .downcast_iter()
270 .map(|arr| arr.iter().map(|opt| opt.copied()));
271 arg_sort::arg_sort(
272 ca.name().clone(),
273 iter,
274 options,
275 ca.null_count(),
276 ca.len(),
277 ca.is_sorted_flag(),
278 ca.get(0).is_none(),
279 )
280 }
281}
282
283fn arg_sort_multiple_numeric<T: PolarsNumericType>(
284 ca: &ChunkedArray<T>,
285 by: &[Column],
286 options: &SortMultipleOptions,
287) -> PolarsResult<IdxCa> {
288 args_validate(ca, by, &options.descending, "descending")?;
289 args_validate(ca, by, &options.nulls_last, "nulls_last")?;
290 let mut count: IdxSize = 0;
291
292 let no_nulls = ca.null_count() == 0;
293
294 if no_nulls {
295 let mut vals = Vec::with_capacity(ca.len());
296 for arr in ca.downcast_iter() {
297 vals.extend_trusted_len(arr.values().as_slice().iter().map(|v| {
298 let i = count;
299 count += 1;
300 (i, NonNull(*v))
301 }))
302 }
303 arg_sort_multiple_impl(vals, by, options)
304 } else {
305 let mut vals = Vec::with_capacity(ca.len());
306 for arr in ca.downcast_iter() {
307 vals.extend_trusted_len(arr.into_iter().map(|v| {
308 let i = count;
309 count += 1;
310 (i, v.copied())
311 }));
312 }
313 arg_sort_multiple_impl(vals, by, options)
314 }
315}
316
317impl<T> ChunkSort<T> for ChunkedArray<T>
318where
319 T: PolarsNumericType,
320{
321 fn sort_with(&self, mut options: SortOptions) -> ChunkedArray<T> {
322 options.multithreaded &= RAYON.current_num_threads() > 1;
323 sort_with_numeric(self, options)
324 }
325
326 fn sort(&self, descending: bool) -> ChunkedArray<T> {
327 self.sort_with(SortOptions {
328 descending,
329 ..Default::default()
330 })
331 }
332
333 fn arg_sort(&self, options: SortOptions) -> IdxCa {
334 arg_sort_numeric(self, options)
335 }
336
337 fn arg_sort_multiple(
342 &self,
343 by: &[Column],
344 options: &SortMultipleOptions,
345 ) -> PolarsResult<IdxCa> {
346 arg_sort_multiple_numeric(self, by, options)
347 }
348}
349
350fn ordering_other_columns<'a>(
351 compare_inner: &'a [Box<dyn TotalOrdInner + 'a>],
352 descending: &[bool],
353 nulls_last: &[bool],
354 idx_a: usize,
355 idx_b: usize,
356) -> Ordering {
357 for ((cmp, descending), nulls_last) in compare_inner.iter().zip(descending).zip(nulls_last) {
358 let ordering =
360 unsafe { (**cmp).cmp_element_unchecked(idx_a, idx_b, *descending, *nulls_last) };
361 if !ordering.is_eq() {
362 return ordering;
363 }
364 }
365 Ordering::Equal
367}
368
369impl ChunkSort<StringType> for StringChunked {
370 fn sort_with(&self, options: SortOptions) -> ChunkedArray<StringType> {
371 unsafe { self.as_binary().sort_with(options).to_string_unchecked() }
372 }
373
374 fn sort(&self, descending: bool) -> StringChunked {
375 self.sort_with(SortOptions {
376 descending,
377 nulls_last: false,
378 multithreaded: true,
379 maintain_order: false,
380 limit: None,
381 })
382 }
383
384 fn arg_sort(&self, options: SortOptions) -> IdxCa {
385 self.as_binary().arg_sort(options)
386 }
387
388 fn arg_sort_multiple(
397 &self,
398 by: &[Column],
399 options: &SortMultipleOptions,
400 ) -> PolarsResult<IdxCa> {
401 self.as_binary().arg_sort_multiple(by, options)
402 }
403}
404
405impl ChunkSort<BinaryType> for BinaryChunked {
406 fn sort_with(&self, mut options: SortOptions) -> ChunkedArray<BinaryType> {
407 options.multithreaded &= RAYON.current_num_threads() > 1;
408 sort_with_fast_path!(self, options);
409 let ca = self.rechunk();
412 let arr = ca.downcast_as_array().clone();
413
414 let (views, buffers, validity, total_bytes_len, total_buffer_len) = arr.into_inner();
415 let mut views = views.to_vec();
416
417 let (partitioned_part, validity) = partition_nulls(&mut views, validity, options);
418
419 sort_unstable_by_branch(partitioned_part, options, |a, b| unsafe {
420 a.get_slice_unchecked(&buffers)
421 .tot_cmp(&b.get_slice_unchecked(&buffers))
422 });
423
424 let array = unsafe {
425 BinaryViewArray::new_unchecked(
426 ArrowDataType::BinaryView,
427 views.into(),
428 buffers,
429 validity,
430 total_bytes_len,
431 total_buffer_len,
432 )
433 };
434
435 let mut out = Self::with_chunk_like(self, array);
436
437 let s = if options.descending {
438 IsSorted::Descending
439 } else {
440 IsSorted::Ascending
441 };
442 out.set_sorted_flag(s);
443 out
444 }
445
446 fn sort(&self, descending: bool) -> ChunkedArray<BinaryType> {
447 self.sort_with(SortOptions {
448 descending,
449 nulls_last: false,
450 multithreaded: true,
451 maintain_order: false,
452 limit: None,
453 })
454 }
455
456 fn arg_sort(&self, options: SortOptions) -> IdxCa {
457 arg_sort_fast_path!(self, options);
458 if self.null_count() == 0 {
459 arg_sort::arg_sort_no_nulls(
460 self.name().clone(),
461 self.downcast_iter().map(|arr| arr.values_iter()),
462 options,
463 self.len(),
464 self.is_sorted_flag(),
465 )
466 } else {
467 arg_sort::arg_sort(
468 self.name().clone(),
469 self.downcast_iter().map(|arr| arr.iter()),
470 options,
471 self.null_count(),
472 self.len(),
473 self.is_sorted_flag(),
474 self.get(0).is_none(),
475 )
476 }
477 }
478
479 fn arg_sort_multiple(
480 &self,
481 by: &[Column],
482 options: &SortMultipleOptions,
483 ) -> PolarsResult<IdxCa> {
484 args_validate(self, by, &options.descending, "descending")?;
485 args_validate(self, by, &options.nulls_last, "nulls_last")?;
486 let mut count: IdxSize = 0;
487
488 let mut vals = Vec::with_capacity(self.len());
489 for arr in self.downcast_iter() {
490 for v in arr {
491 let i = count;
492 count += 1;
493 vals.push((i, v))
494 }
495 }
496
497 arg_sort_multiple_impl(vals, by, options)
498 }
499}
500
501impl ChunkSort<BinaryOffsetType> for BinaryOffsetChunked {
502 fn sort_with(&self, mut options: SortOptions) -> BinaryOffsetChunked {
503 options.multithreaded &= RAYON.current_num_threads() > 1;
504 sort_with_fast_path!(self, options);
505
506 let mut v: Vec<&[u8]> = Vec::with_capacity(self.len());
507 for arr in self.downcast_iter() {
508 v.extend(arr.non_null_values_iter());
509 }
510
511 sort_impl_unstable(v.as_mut_slice(), options);
512
513 let mut values = Vec::<u8>::with_capacity(self.get_values_size());
514 let mut offsets = Vec::<i64>::with_capacity(self.len() + 1);
515 let mut length_so_far = 0i64;
516 offsets.push(length_so_far);
517
518 let len = self.len();
519 let null_count = self.null_count();
520 let mut ca: Self = match (null_count, options.nulls_last) {
521 (0, _) => {
522 for val in v {
523 values.extend_from_slice(val);
524 length_so_far = values.len() as i64;
525 offsets.push(length_so_far);
526 }
527 let arr = unsafe {
529 BinaryArray::from_data_unchecked_default(offsets.into(), values.into(), None)
530 };
531 ChunkedArray::with_chunk(self.name().clone(), arr)
532 },
533 (_, true) => {
534 for val in v {
535 values.extend_from_slice(val);
536 length_so_far = values.len() as i64;
537 offsets.push(length_so_far);
538 }
539 offsets.extend(std::iter::repeat_n(length_so_far, null_count));
540
541 let arr = unsafe {
543 BinaryArray::from_data_unchecked_default(
544 offsets.into(),
545 values.into(),
546 Some(create_validity(len, null_count, true)),
547 )
548 };
549 ChunkedArray::with_chunk(self.name().clone(), arr)
550 },
551 (_, false) => {
552 offsets.extend(std::iter::repeat_n(length_so_far, null_count));
553
554 for val in v {
555 values.extend_from_slice(val);
556 length_so_far = values.len() as i64;
557 offsets.push(length_so_far);
558 }
559
560 let arr = unsafe {
562 BinaryArray::from_data_unchecked_default(
563 offsets.into(),
564 values.into(),
565 Some(create_validity(len, null_count, false)),
566 )
567 };
568 ChunkedArray::with_chunk(self.name().clone(), arr)
569 },
570 };
571
572 let s = if options.descending {
573 IsSorted::Descending
574 } else {
575 IsSorted::Ascending
576 };
577 ca.set_sorted_flag(s);
578 ca
579 }
580
581 fn sort(&self, descending: bool) -> BinaryOffsetChunked {
582 self.sort_with(SortOptions {
583 descending,
584 nulls_last: false,
585 multithreaded: true,
586 maintain_order: false,
587 limit: None,
588 })
589 }
590
591 fn arg_sort(&self, mut options: SortOptions) -> IdxCa {
592 options.multithreaded &= RAYON.current_num_threads() > 1;
593 let ca = self.rechunk();
594 let arr = ca.downcast_as_array();
595 let mut idx = (0..(arr.len() as IdxSize)).collect::<Vec<_>>();
596
597 let argsort = |args| {
598 if options.maintain_order {
599 sort_by_branch(
600 args,
601 options.descending,
602 |a, b| unsafe {
603 let a = arr.value_unchecked(*a as usize);
604 let b = arr.value_unchecked(*b as usize);
605 a.tot_cmp(&b)
606 },
607 options.multithreaded,
608 );
609 } else {
610 sort_unstable_by_branch(args, options, |a, b| unsafe {
611 let a = arr.value_unchecked(*a as usize);
612 let b = arr.value_unchecked(*b as usize);
613 a.tot_cmp(&b)
614 });
615 }
616 };
617
618 if self.null_count() == 0 {
619 argsort(&mut idx);
620 IdxCa::from_vec(self.name().clone(), idx)
621 } else {
622 let (partitioned_part, validity) =
624 partition_nulls(&mut idx, arr.validity().cloned(), options);
625 argsort(partitioned_part);
626 IdxCa::with_chunk(
627 self.name().clone(),
628 IdxArr::from_data_default(idx.into(), validity),
629 )
630 }
631 }
632
633 fn arg_sort_multiple(
641 &self,
642 by: &[Column],
643 options: &SortMultipleOptions,
644 ) -> PolarsResult<IdxCa> {
645 args_validate(self, by, &options.descending, "descending")?;
646 args_validate(self, by, &options.nulls_last, "nulls_last")?;
647 let mut count: IdxSize = 0;
648
649 let mut vals = Vec::with_capacity(self.len());
650 for arr in self.downcast_iter() {
651 for v in arr {
652 let i = count;
653 count += 1;
654 vals.push((i, v))
655 }
656 }
657
658 arg_sort_multiple_impl(vals, by, options)
659 }
660}
661
662#[cfg(feature = "dtype-struct")]
663impl ChunkSort<StructType> for StructChunked {
664 fn sort_with(&self, mut options: SortOptions) -> ChunkedArray<StructType> {
665 options.multithreaded &= RAYON.current_num_threads() > 1;
666 let idx = self.arg_sort(options);
667 let mut out = unsafe { self.take_unchecked(&idx) };
668
669 let s = if options.descending {
670 IsSorted::Descending
671 } else {
672 IsSorted::Ascending
673 };
674 out.set_sorted_flag(s);
675 out
676 }
677
678 fn sort(&self, descending: bool) -> ChunkedArray<StructType> {
679 self.sort_with(SortOptions::new().with_order_descending(descending))
680 }
681
682 fn arg_sort(&self, options: SortOptions) -> IdxCa {
683 let bin = self.get_row_encoded(options).unwrap();
684 bin.arg_sort(Default::default())
685 }
686}
687
688impl ChunkSort<ListType> for ListChunked {
689 fn sort_with(&self, mut options: SortOptions) -> ListChunked {
690 options.multithreaded &= RAYON.current_num_threads() > 1;
691 let idx = self.arg_sort(options);
692 let mut out = unsafe { self.take_unchecked(&idx) };
693
694 let s = if options.descending {
695 IsSorted::Descending
696 } else {
697 IsSorted::Ascending
698 };
699 out.set_sorted_flag(s);
700 out
701 }
702
703 fn sort(&self, descending: bool) -> ListChunked {
704 self.sort_with(SortOptions::new().with_order_descending(descending))
705 }
706
707 fn arg_sort(&self, options: SortOptions) -> IdxCa {
708 let bin = _get_rows_encoded_ca(
709 self.name().clone(),
710 &[self.clone().into_column()],
711 &[options.descending],
712 &[options.nulls_last],
713 false,
714 )
715 .unwrap();
716 bin.arg_sort(Default::default())
717 }
718}
719
720impl ChunkSort<BooleanType> for BooleanChunked {
721 fn sort_with(&self, mut options: SortOptions) -> ChunkedArray<BooleanType> {
722 options.multithreaded &= RAYON.current_num_threads() > 1;
723 sort_with_fast_path!(self, options);
724 let mut bitmap = BitmapBuilder::with_capacity(self.len());
725 let mut validity =
726 (self.null_count() > 0).then(|| BitmapBuilder::with_capacity(self.len()));
727
728 if self.null_count() > 0 && !options.nulls_last {
729 bitmap.extend_constant(self.null_count(), false);
730 if let Some(validity) = &mut validity {
731 validity.extend_constant(self.null_count(), false);
732 }
733 }
734
735 let n_valid = self.len() - self.null_count();
736 let n_set = self.sum().unwrap() as usize;
737 if options.descending {
738 bitmap.extend_constant(n_set, true);
739 bitmap.extend_constant(n_valid - n_set, false);
740 } else {
741 bitmap.extend_constant(n_valid - n_set, false);
742 bitmap.extend_constant(n_set, true);
743 }
744 if let Some(validity) = &mut validity {
745 validity.extend_constant(n_valid, true);
746 }
747
748 if self.null_count() > 0 && options.nulls_last {
749 bitmap.extend_constant(self.null_count(), false);
750 if let Some(validity) = &mut validity {
751 validity.extend_constant(self.null_count(), false);
752 }
753 }
754
755 let mut ca = Self::from_chunk_iter(
756 self.name().clone(),
757 Some(BooleanArray::from_data_default(
758 bitmap.freeze(),
759 validity.map(|v| v.freeze()),
760 )),
761 );
762 ca.set_sorted_flag(if options.descending {
763 IsSorted::Descending
764 } else {
765 IsSorted::Ascending
766 });
767 ca
768 }
769
770 fn sort(&self, descending: bool) -> BooleanChunked {
771 self.sort_with(SortOptions {
772 descending,
773 nulls_last: false,
774 multithreaded: true,
775 maintain_order: false,
776 limit: None,
777 })
778 }
779
780 fn arg_sort(&self, options: SortOptions) -> IdxCa {
781 arg_sort_fast_path!(self, options);
782 if self.null_count() == 0 {
783 arg_sort::arg_sort_no_nulls(
784 self.name().clone(),
785 self.downcast_iter().map(|arr| arr.values_iter()),
786 options,
787 self.len(),
788 self.is_sorted_flag(),
789 )
790 } else {
791 arg_sort::arg_sort(
792 self.name().clone(),
793 self.downcast_iter().map(|arr| arr.iter()),
794 options,
795 self.null_count(),
796 self.len(),
797 self.is_sorted_flag(),
798 self.get(0).is_none(),
799 )
800 }
801 }
802 fn arg_sort_multiple(
803 &self,
804 by: &[Column],
805 options: &SortMultipleOptions,
806 ) -> PolarsResult<IdxCa> {
807 let mut vals = Vec::with_capacity(self.len());
808 let mut count: IdxSize = 0;
809 for arr in self.downcast_iter() {
810 vals.extend_trusted_len(arr.into_iter().map(|v| {
811 let i = count;
812 count += 1;
813 (i, v.map(|v| v as u8))
814 }));
815 }
816 arg_sort_multiple_impl(vals, by, options)
817 }
818}
819
820pub fn _broadcast_bools(n_cols: usize, values: &mut Vec<bool>) {
821 if n_cols > values.len() && values.len() == 1 {
822 while n_cols != values.len() {
823 values.push(values[0]);
824 }
825 }
826}
827
828pub fn arg_sort(columns: &[Column], mut sort_options: SortMultipleOptions) -> PolarsResult<IdxCa> {
831 assert!(!columns.is_empty());
832
833 for column in columns {
834 if column.dtype().is_object() {
835 polars_bail!(
836 InvalidOperation: "column '{}' has a dtype of '{}', which does not support sorting", column.name(), column.dtype()
837 )
838 }
839 }
840
841 if let [c] = columns {
842 Ok(c.arg_sort(SortOptions {
843 descending: sort_options.descending[0],
844 nulls_last: sort_options.nulls_last[0],
845 multithreaded: sort_options.multithreaded,
846 maintain_order: sort_options.maintain_order,
847 limit: sort_options.limit,
848 }))
849 } else if sort_options.nulls_last.iter().all(|&x| x)
850 || columns.iter().any(|c| c.dtype().is_nested())
851 || std::env::var("POLARS_ROW_FMT_SORT").is_ok()
852 {
853 argsort_multiple_row_fmt(
854 columns,
855 sort_options.descending,
856 sort_options.nulls_last,
857 sort_options.multithreaded,
858 )
859 } else {
860 let n_cols = columns.len();
861
862 _broadcast_bools(n_cols, &mut sort_options.descending);
863 _broadcast_bools(n_cols, &mut sort_options.nulls_last);
864
865 columns[0]
866 .clone()
867 .as_materialized_series()
868 .arg_sort_multiple(&columns[1..], &sort_options)
869 }
870}
871
872pub unsafe fn perfect_sort(idx: &[(IdxSize, IdxSize)], out: &mut Vec<IdxSize>) {
886 let chunk_size = std::cmp::max(
887 idx.len() / RAYON.current_num_threads(),
888 RAYON.current_num_threads(),
889 );
890
891 out.reserve(idx.len());
892 let ptr = out.as_mut_ptr() as *const IdxSize as usize;
893
894 RAYON.install(|| {
895 idx.par_chunks(chunk_size).for_each(|indices| {
896 let ptr = ptr as *mut IdxSize;
897 for (idx_val, idx_location) in indices {
898 unsafe { *ptr.add(*idx_location as usize) = *idx_val };
902 }
903 });
904 });
905 unsafe { out.set_len(idx.len()) };
908}
909
910#[cfg(test)]
911mod test {
912 use crate::prelude::*;
913 #[test]
914 fn test_arg_sort() {
915 let a = Int32Chunked::new(
916 PlSmallStr::from_static("a"),
917 &[
918 Some(1), Some(5), None, Some(1), None, Some(4), Some(3), Some(1), ],
927 );
928 let idx = a.arg_sort(SortOptions {
929 descending: false,
930 ..Default::default()
931 });
932 let idx = idx.cont_slice().unwrap();
933
934 let expected = [2, 4, 0, 3, 7, 6, 5, 1];
935 assert_eq!(idx, expected);
936
937 let idx = a.arg_sort(SortOptions {
938 descending: true,
939 ..Default::default()
940 });
941 let idx = idx.cont_slice().unwrap();
942 let expected = [2, 4, 1, 5, 6, 0, 3, 7];
944 assert_eq!(idx, expected);
945 }
946
947 #[test]
948 fn test_sort() {
949 let a = Int32Chunked::new(
950 PlSmallStr::from_static("a"),
951 &[
952 Some(1),
953 Some(5),
954 None,
955 Some(1),
956 None,
957 Some(4),
958 Some(3),
959 Some(1),
960 ],
961 );
962 let out = a.sort_with(SortOptions {
963 descending: false,
964 nulls_last: false,
965 multithreaded: true,
966 maintain_order: false,
967 limit: None,
968 });
969 assert_eq!(
970 Vec::from(&out),
971 &[
972 None,
973 None,
974 Some(1),
975 Some(1),
976 Some(1),
977 Some(3),
978 Some(4),
979 Some(5)
980 ]
981 );
982 let out = a.sort_with(SortOptions {
983 descending: false,
984 nulls_last: true,
985 multithreaded: true,
986 maintain_order: false,
987 limit: None,
988 });
989 assert_eq!(
990 Vec::from(&out),
991 &[
992 Some(1),
993 Some(1),
994 Some(1),
995 Some(3),
996 Some(4),
997 Some(5),
998 None,
999 None
1000 ]
1001 );
1002 let b = BooleanChunked::new(
1003 PlSmallStr::from_static("b"),
1004 &[Some(false), Some(true), Some(false)],
1005 );
1006 let out = b.sort_with(SortOptions::default().with_order_descending(true));
1007 assert_eq!(Vec::from(&out), &[Some(true), Some(false), Some(false)]);
1008 let out = b.sort_with(SortOptions::default().with_order_descending(false));
1009 assert_eq!(Vec::from(&out), &[Some(false), Some(false), Some(true)]);
1010 }
1011
1012 #[test]
1013 #[cfg_attr(miri, ignore)]
1014 fn test_arg_sort_multiple() -> PolarsResult<()> {
1015 let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 2, 1, 1, 3, 4, 3, 3]);
1016 let b = Int64Chunked::new(PlSmallStr::from_static("b"), &[0, 1, 2, 3, 4, 5, 6, 1]);
1017 let c = StringChunked::new(
1018 PlSmallStr::from_static("c"),
1019 &["a", "b", "c", "d", "e", "f", "g", "h"],
1020 );
1021 let df = DataFrame::new_infer_height(vec![
1022 a.into_series().into(),
1023 b.into_series().into(),
1024 c.into_series().into(),
1025 ])?;
1026
1027 let out = df.sort(["a", "b", "c"], SortMultipleOptions::default())?;
1028 assert_eq!(
1029 Vec::from(out.column("b")?.as_series().unwrap().i64()?),
1030 &[
1031 Some(0),
1032 Some(2),
1033 Some(3),
1034 Some(1),
1035 Some(1),
1036 Some(4),
1037 Some(6),
1038 Some(5)
1039 ]
1040 );
1041
1042 let a = StringChunked::new(
1044 PlSmallStr::from_static("a"),
1045 &["a", "b", "c", "a", "b", "c"],
1046 )
1047 .into_series();
1048 let b = Int32Chunked::new(PlSmallStr::from_static("b"), &[5, 4, 2, 3, 4, 5]).into_series();
1049 let df = DataFrame::new_infer_height(vec![a.into(), b.into()])?;
1050
1051 let out = df.sort(["a", "b"], SortMultipleOptions::default())?;
1052 let expected = df!(
1053 "a" => ["a", "a", "b", "b", "c", "c"],
1054 "b" => [3, 5, 4, 4, 2, 5]
1055 )?;
1056 assert!(out.equals(&expected));
1057
1058 let df = df!(
1059 "groups" => [1, 2, 3],
1060 "values" => ["a", "a", "b"]
1061 )?;
1062
1063 let out = df.sort(
1064 ["groups", "values"],
1065 SortMultipleOptions::default().with_order_descending_multi([true, false]),
1066 )?;
1067 let expected = df!(
1068 "groups" => [3, 2, 1],
1069 "values" => ["b", "a", "a"]
1070 )?;
1071 assert!(out.equals(&expected));
1072
1073 let out = df.sort(
1074 ["values", "groups"],
1075 SortMultipleOptions::default().with_order_descending_multi([false, true]),
1076 )?;
1077 let expected = df!(
1078 "groups" => [2, 1, 3],
1079 "values" => ["a", "a", "b"]
1080 )?;
1081 assert!(out.equals(&expected));
1082
1083 Ok(())
1084 }
1085
1086 #[test]
1087 fn test_sort_string() {
1088 let ca = StringChunked::new(
1089 PlSmallStr::from_static("a"),
1090 &[Some("a"), None, Some("c"), None, Some("b")],
1091 );
1092 let out = ca.sort_with(SortOptions {
1093 descending: false,
1094 nulls_last: false,
1095 multithreaded: true,
1096 maintain_order: false,
1097 limit: None,
1098 });
1099 let expected = &[None, None, Some("a"), Some("b"), Some("c")];
1100 assert_eq!(Vec::from(&out), expected);
1101
1102 let out = ca.sort_with(SortOptions {
1103 descending: true,
1104 nulls_last: false,
1105 multithreaded: true,
1106 maintain_order: false,
1107 limit: None,
1108 });
1109
1110 let expected = &[None, None, Some("c"), Some("b"), Some("a")];
1111 assert_eq!(Vec::from(&out), expected);
1112
1113 let out = ca.sort_with(SortOptions {
1114 descending: false,
1115 nulls_last: true,
1116 multithreaded: true,
1117 maintain_order: false,
1118 limit: None,
1119 });
1120 let expected = &[Some("a"), Some("b"), Some("c"), None, None];
1121 assert_eq!(Vec::from(&out), expected);
1122
1123 let out = ca.sort_with(SortOptions {
1124 descending: true,
1125 nulls_last: true,
1126 multithreaded: true,
1127 maintain_order: false,
1128 limit: None,
1129 });
1130 let expected = &[Some("c"), Some("b"), Some("a"), None, None];
1131 assert_eq!(Vec::from(&out), expected);
1132
1133 let ca = StringChunked::new(
1135 PlSmallStr::from_static("a"),
1136 &[Some("a"), Some("c"), Some("b")],
1137 );
1138 let out = ca.sort(false);
1139 let expected = &[Some("a"), Some("b"), Some("c")];
1140 assert_eq!(Vec::from(&out), expected);
1141
1142 let out = ca.sort(true);
1143 let expected = &[Some("c"), Some("b"), Some("a")];
1144 assert_eq!(Vec::from(&out), expected);
1145 }
1146}