The sorting principle of Array.prototype.sort

The sorting principle of Array.prototype.sort

Has been updating recently

Typescript
Data structure and algorithm series, in learning
JS
of
sort
The method generates curiosity,
Array.prototype.sort()
The usage of is definitely familiar:

arr.sort([compareFunction]); copy the code

sort
The parameter of the method is an optional sort callback function
compareFunction
, There are two parameters for comparison in the callback function. If omitted, the elements are in accordance with the characters of the converted string
Unicode
The sites are sorted. entire
sort
The method will return the sorted array, but the original array has been sorted in place.

But this

Array.prototype.sort()
What is the logic of, what is the sorting method used, and the time and space complexity are not clear. This article starts from
ECMA262
standard,
V8
Engine source code to explore
sort
method.

ECMA262

Since you want to sort, you will definitely use the size comparison, in

ECMA262
The definition of the relationship between size in the translation is as follows:

Comparison of size relationship

Compare

x <y
(among them
x
with
y
Is the value) will produce
true``, false
or
undefined
(Indicates that at least one operand is
NaN
). except
x
with
y
In addition, the algorithm also uses the name
LeftFirst
The boolean flag as a parameter. This flag is used to control the
x
with
y
The order in which operations with potential side effects are performed. This is necessary because
ECMAScript
Specify the left-to-right calculation of the expression.
LeftFirst
The default value is
true
, Which means first
x
Parameter position calculation expression. in case
LeftFirst
for
false
, The situation is the opposite, you must now
y
Execute the operation on. The comparison is performed as follows:

  1. in case
    LeftFirst
    Marked as
    true
    , Then
    1. yield
      px
      for
      ?ToPrimitive(x, number)
      .
    2. yield
      py
      for
      ?ToPrimitive(y, number)
      .
  2. otherwise,
    1. yield
      py
      for
      ?ToPrimitive(y, number)
      .
    2. yield
      px
      for
      ?ToPrimitive(x, number)
      .
  3. in case
    Type(px)
    for
    String
    and
    Type(py)
    for
    String
    , Then
    1. in case
      IsStringPrefix(py, px)
      for
      true
      , Return
      false
      .
    2. in case
      IsStringPrefix(px, py)
      for
      true
      , Return
      true
      .
    3. make
      k
      Is the smallest non-negative integer, this non-negative integer can make
      px
      on
      k
      The code unit at the index is the same as
      py
      on
      k
      The code unit at the index is different.
    4. yield
      m
      for
      px
      on
      k
      The integer value of the code unit at the index.
    5. yield
      n
      for
      py
      on
      k
      The integer value of the code unit at the index.
    6. in case
      m <n
      , Return
      true
      . Otherwise, return
      false
      .
  4. otherwise,
    1. in case
      Type(px)
      for
      BigInt
      and
      Type(py)
      for
      String
      , Then
      1. yield
        ny
        for
        !StringToBigInt(py)
        .
      2. in case
        ny
        for
        NaN
        , Return
        undefined
        .
      3. return
        BigInt::lessThan(px, ny)
        .
    2. in case
      Type(px)
      for
      String
      and
      Type(py)
      for
      BigInt
      , Then
      1. yield
        nx
        for
        !StringToBigInt(px)
        .
      2. in case
        nx
        for
        NaN
        , Return
        undefined
        .
      3. return
        BigInt::lessThan(nx, py)
        .
    3. yield
      nx
      for
      !ToNumeric(px)
      .
    4. yield
      ny
      for
      !ToNumeric(py)
      .
    5. in case
      Type(nx)
      versus
      Type(ny)
      Same, return
      Type(nx)::lessThan(nx, ny)
      .
    6. assertion:
      Type(nx)
      for
      BigInt
      and
      Type(ny)
      for
      Number
      , Or
      Type(nx)
      for
      Number
      and
      Type(ny)
      for
      BigInt
      .
    7. in case
      nx
      or
      ny
      for
      NaN
      , Return
      undefined
      .
    8. in case
      nx
      for
      -
      or
      ny
      for
      +
      , Return
      true
      .
    9. in case
      nx
      for
      +
      or
      ny
      for
      -
      , Return
      false
      .
    10. in case
      (nx) < (ny)
      , Return
      true
      ; Otherwise return
      false
      .

Unicode
Unicode
UTF-16

x < y

  1. x
    y
    String
    1. x
      y
      y
      x
    2. x
      y
  2. x
    y
    NaN
    undefined
    undefined
    false

JS
UTF-16
16
emoji
2
16
2

ECMA262

/** * */ 'lin' < 'linjingyi'; //x y true 3.2 'linjingyi' < 'lin'; //y x false 3.1 /** * */ 'm' < 'n'; // 006d < 006e true 3.6 k 0 'm' < 'l'; // 006d < 006c false 3.6 k 0 ' ' < ' '; // 4f60 < 6211 true 3.6 k 0 'ml' < 'mn'; // 006d 006c < 006d 006e true 3.6 k 1 ' ' < ' '; // 4f60 597d < 6211 597d true 3.6 k 1 ' ' < ' '; // 4f60 597d < 6211 true 3.6 k 1 '10' < '2'; // 0031 0030 < 0032 true 3.6 k 0 ' ' < ' '; //emoji d83c df4e < d83d de19 true 3.6 k 0 ' ' < ' '; // d83d de0d < d83d de19 true 3.6 k 1 /** * */ '10' < 2; //10 10 10 < 2 false 4.1.3 'm' < 2 //m NaN NaN < 2 false 4.1.2 /** * ( ) * undefined -> NaN * null -> 0 * true -> 1 * false -> 0 * String -> NaN * Object -> toString String -> NaN */ null < 1 //0 < 1 true 4.5 false < 1 //0 < 1 true 4.5 undefined < 1 //NaN < 1 false 4.7 true < 2 //1 < 2, true 4.5 true < '2' //1 < 2 true 4.5 null < true //0 < 1 true 4.5 ({}) < ([]) //'[object Object]' < '' false 4.5 ({}) < ([1 2 3]) //'[object Object]' < '1 2 3' false 4.5 ({}) < ([' ' 2 3]) //'[object Object]' < ' 2 3' '[' < ' ' 005b < 4f60 true 4.5

ECMA262
Array.prototype.sort()

Array.prototype.sort(comparefn)

comparefn
undefined
x
y
x < y
x > y
0

sort

  1. comparefn
    undefined
    IsCallable(comparefn)
    false
    ,
    TypeError
    .
  2. obj
    ?ToObject(this value)
    .
  3. len
    ?LengthOfArrayLike(obj)
    .
  4. items
    List
    .
  5. k
    0
    .
  6. , while
    k < len
    ,
    1. Pk
      !ToString( (k))
      .
    2. kPresent
      ?HasProperty(obj, Pk)
      .
    3. kPresent
      true
      ,
      1. kValue
        ?Get(obj, Pk)
        .
      2. kValue
        items
        .
    4. k
      k + 1
      .
  7. itemCount
    items
    .
  8. SortCompare
    items
    SortCompare
  9. j
    0
    .
  10. , while
    j < itemCount
    ,
    1. ?Set(obj, !ToString( (j)), items[j], true)
      .
    2. j
      j + 1
      .
  11. , while
    j < len
    ,
    1. ?DeletePropertyOrThrow(obj, !ToString( (j)))
      .
    2. j
      j + 1
      .
  12. obj
    .

len
obj
sort

  • comparefn
    undefined
    items

  • comparefn
    undefined
    SortCompare

  • comparefn
    undefined
    ToString
    SortCompare

8
items

  • itemCount
    itemCount
    j
    old[j]
    new[ (j)]
  • itemCount
    j
    and
    k
    ,
    SortCompare(old[j], old[k]) < 0
    ,
    (j) < (k)
    .

old[j]
8
items[j]
new[j]
8
items[j]

comparefn

a
,
b
c
s
a <CF b
comparefn(a, b) < 0
;
a =CF b
comparefn(a, b) = 0
;
a >CF b
comparefn(a, b) > 0

  • a
    b
    comparefn(a, b)
    v
    ,
    Type(v)
    Number
    ,
    v
    NaN
    a
    b
    a <CF b
    ,
    a =CF b
    a >CF b
  • comparefn(a, b)
    obj
    obj
    .
  • a =CF a
    ( )
  • a =CF b
    ,
    b =CF a
    ( )
  • a =CF b
    b =CF c
    ,
    a =CF c
    (
    =CF
    )
  • a <CF b
    b <CF c
    ,
    a <CF c
    (
    <CF
    )
  • a >CF b
    b >CF c
    ,
    a >CF c
    (
    >CF
    )

NOTE 1

comparefn
S

NOTE 2

sort
this
Array

SortCompare(x, y)

SortCompare
x
y
sort
comparefn

  1. x
    y
    undefined
    ,
    +0
    .
  2. x
    undefined
    ,
    1
    .
  3. y
    undefined
    ,
    -1
    .
  4. comparefn
    undefined
    ,
    1. v
      ?ToNumber(?Call(comparefn, undefined, x, y ))
      .
    2. v
      NaN
      ,
      +0
      .
    3. v
      .
  5. xString
    ?ToString(x)
    .
  6. yString
    ?ToString(y)
    .
  7. xSmaller
    Abstract Relational Comparison
    xString < yString
    .
  8. xSmaller
    true
    ,
    -1
    .
  9. ySmaller
    Abstract Relational Comparison
    yString < xString
    .
  10. ySmaller
    true
    ,
    1
    .
  11. +0
    .

NOTE 1

undefined
undefined
undefined

NOTE 2

5
6
ToString
SortCompare

sort

ECMA262

SortCompare(x, y)
x
y

  • x
    undefined
    1
    y
    undefined
    -1
    undefined
    +0
    comparefn
  • comparefn
    NaN
    +0
  • comparefn
    x
    y
    x
    1
    y
    -1
    +0

Array.prototype.sort(comparefn)

  • sort
  • sort
  • sort
    JS
  • sort
  • comparefn

undefined
undefined

V8

ECMA262
sort
V8

V8
tq
sort
1400
timsort
Tim sort, the fastest sort used in v8 and Python

Tim sort, the fastest sort used in v8 and Python

Tim Sort
2002
Python Tim Peters Python
list
O(n)
O(nlogn)
O(n )

list
runs

runs

2
run
Tim
run
32 64

run
runs
min run

:

  1. item
    descending flag
    item
    false
  2. items
  3. run
    run
  4. run
    min run
    items

js
:

let nremaining = list.length; const runs = []; const minrun = merge_compute_minrun(nremaining); function count_run(list) { let descending = false; let n = 1; if (list.length === 1) return n; if (list[n] < list[n - 1]) { descending = true; } n++; while (n < list.length && (list[n] < list[n - 1] || !desending)) { n++; } return { size: n, descending, }; } do { const startIndex = list.length - nremaining; let { size, descending } = count_run(); if (descending) reverse( list, //start index startIndex, //length to revert size ); if (n < minrun) { const force = nremaining <= minrun ? nremaining : minrun; binarysort( list, //sort start index startIndex, //sort length force, //index to start binary search list.length - nremaining + size ); size = force; } nremaining -= n; runs.push({ startIndex, size }); } while (nremaining);

2
runs
Tim
2
64
min run
:

function merge_compute_minrun(n) { var r = 0; /* becomes 1 if any 1 bits are shifted off */ while (n >= 64) { r |= n & 1; n >>= 1; } return n + r; }

2
runs
list
Tim

runs

3
runs
:

  • A: 1000
  • B: 2000
  • C: 1000

A
C
A B C
A
C
B

Tim sort

2
runs

3
runs
:

A > B + C B > C

B
C
runs
runs
A

runs

gallop

runs
:

  • A: [0, 2, 4, 6, 8, 10, ..., 100]
  • B: [57, 77, 97, ...]

B - 57
A - 0
57
58/2 = 29
B - 77
(78-58)/2 = 10

Tim sort

gallop
gallop

2

A[7]
A[7]
A[9]
A[11]
A[15]
A[23]
A[35]
...
A[35]
A[23]
A[35]

:

function gallop_left( key, //the data to merge a, //the run to merge to startIndex ) { let lastofs = 0; //starting point for binary search let ofs = 1; //end point for binary search const n = a.length; if (a[hint] < key) { //compare from left to right const maxofs = n - hint; while (ofs < maxofs && a[ofs] < key) { lastofs = ofs; ofs = (ofs << 1) + 1; } if (ofs > maxofs) ofs = maxofs; lastofs += hint; ofs += hint; } else { //compare from right to left const maxofs = hint + 1; while (ofs < maxofs && a[hint - ofs] < key) { lastofs = ofs; ofs = (ofs << 1) + 1; } if (ofs > maxofs) ofs = maxofs; let tmp = lastofs; lastofs = hint - ofs; ofs = hint - tmp; } //do binary search at the end ++lastofs; while (lastofs < ofs) { let m = lastofs + ((ofs - lastofs) >> 1); if (a[m] < key) lastofs = m + 1; else ofs = m; } return ofs; //return the found index }

gallop

runs
:

  • A: [0, 2, 4, 6, 8, 10, ..., 100]
  • B: [57, 77, 97, ...]

gallop_left(b[0], a, 0)
B[0]
A
gallop_right(a[a.length - 1], b, b.length - 1)
A[0]
B

gallop_left
gallop_right

:

function merge(ms /* MergeState */, a, b) { let k = gallop_right(b[0], a, 0); if (k < 0) throw -1; let merged = a.splice(0, k); if (a.length == 0) return merged.concat(b); else merged.push(b.shift()); let nb = gallop_left(a[a.length - 1], b, b.length - 1); if (nb <= 0) throw nb; const lastPart = b.splice(nb); lastPart.unshift(a.pop()); if (a.length <= nb) merged = merged.concat(merge_lo(ms, a, b)); else merged = merged.concat(merge_hi(ms, a, b)); return merged.concat(lastPart); }

Gallop
runs
Tim sort
Gallop

Python

7
Gallop
runs
Gallop
7
Gallop

:

const MIN_GALLOP = 7; function merge_lo( ms, //MergeState a, //the shorter run b //the longer run ) { let min_gallop = ms.min_gallop; let merged = []; for (;;) { let acount = 0; /* # of times A won in a row */ let bcount = 0; /* # of times B won in a row */ //do sequential comparing first, util it fulfils the galloping constrain do { if (b[0] < a[0]) { merged.push(b.shift()); ++bcount; acount = 0; if (b.length == 0) return merged.concat(a); } else { merged.push(a.shift()); ++acount; bcount = 0; if (a.length == 0) return merged.concat(b); } } while ((acount || bcount) >= min_gallop)) //galloping and merging ++min_gallop; do { ms.min_gallop = min_gallop -= min_gallop > 1; let k = gallop_right(b[0], a, 0); acount = k; if (k) { if (k < 0) return -1; merged = merged.concat(a.splice(0, k)); if (a.length == 0) return merged.concat(b); } merged.push(b.shift()); if (b.length == 0) return merged.concat(a); k = gallop_left(a[0], b, 0); bcount = k; if (k) { if (k < 0) return -1; merged = merged.concat(b.splice(0, k)); if (b.length == 0) return merged.concat(a); } merged.push(a.shift()); if (a.length == 0) return merged.concat(b); } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); ms.min_gallop = ++min_gallop; } return merged; }

merge_hi
merge_lo

ECMA262
sort
V8
Array.prototype.sort

  1. ECMA262
    Array.prototype.sort
    SortCompare
    SortCompare
    sort
  2. Array.prototype.sort
    python Tim
    Tim sort
  3. O(n)
    O(nlogn)
    O(nlogn)
    O(n )
    O(n)