前端 Array.sort() 源码学习

array,sort · 浏览次数 : 0

小编点评

这段 V8 引擎的 Array.sort() 方法的源码主要使用了快速排序算法（Quick Sort）和插入排序算法（Insertion Sort），用于对数组进行排序。 1. 首先，检查传入的比较函数是否存在，如果不存在，则使用默认的比较函数。 2. 然后，根据数组长度的不同，采用不同的排序算法： - 如果数组长度小于等于 22，则使用插入排序算法。 - 否则，使用快速排序算法。 3. 在快速排序算法中，当数组长度小于等于 10 时，切换到插入排序算法。 4. 对于非数组对象，如果原型链上有暴露的元素，需要进行 shadow 处理。 5. 最后，处理数组中的索引访问器，将 hole 和 undefined 移动到数组末尾，并更新数组长度。总体来说，这段源码实现了对数组的快速和高效排序。

正文

源码地址

V8源码Array
710行开始为sort()相关

Array.sort()方法是那种排序呢？

去看源码主要是源于这个问题

// In-place QuickSort algorithm.
// For short (length <= 22) arrays, insertion sort is used for efficiency.
复制

源码中的第一句话就回答了我的问题

// 通常使用快速排序算法
// 如果数组长度小于23，则插入排序效率更好
复制

既然都打开了，索性就看看源码叭，看看sort到底做了些啥
我把一整坨源码码分成一块一块来看，让自己比较清晰的知道sort到底干了些啥，下面是阅读代码时，自己的思路梳理

第一块代码

if (!IS_CALLABLE(comparefn)) {
  comparefn = function (x, y) {
    if (x === y) return 0;
    if (%_IsSmi(x) && %_IsSmi(y)) {
      return %SmiLexicographicCompare(x, y);
    }
    x = TO_STRING(x);
    y = TO_STRING(y);
    if (x == y) return 0;
    else return x < y ? -1 : 1;
  };
}
复制

第一块内容判断，如果传进来的参数不可回调，则给一个默认的回调函数
这个回调函数，判断俩值是不是Smi

// Smi：小整数(Small integers)V8引擎中的元素类型之一
`https://medium.com/@justjavac/v8-internals-how-small-is-a-small-integer-ba5e17a3ae5f`
// PS: markdown语法有问题，这里直接贴出 url
复制

如果是则进行小整数字典序比较
什么是字典序

否则将两个值转换成字符串进行字符串比较大小
字符串如何比较大小

第二块代码

var InsertionSort = function InsertionSort(a, from, to) {
  ...
};
var QuickSort = function QuickSort(a, from, to) {
  if (to - from <= 10) {
    InsertionSort(a, from, to);
    return;
  }
  ...
};
复制

第二块就是正常的快速排序和插入排序
这里采取的是数量小于10的数组使用 InsertionSort(插入)，比10大的数组则使用 QuickSort(快速)。

第三块代码

if (!is_array) {
  // For compatibility with JSC, we also sort elements inherited from
  // the prototype chain on non-Array objects.
  // We do this by copying them to this object and sorting only
  // own elements. This is not very efficient, but sorting with
  // inherited elements happens very, very rarely, if at all.
  // The specification allows "implementation dependent" behavior
  // if an element on the prototype chain has an element that
  // might interact with sorting.
  max_prototype_element = CopyFromPrototype(array, length);
}
复制

这块代码里面的注释，讲的还是比较详细的，百度翻译也非常nice

// 为了与JSC兼容，我们还在非数组对象上对从原型链继承的元素进行排序。
// 我们通过将它们复制到这个对象并只对自己的元素排序来实现这一点。
// 这不是很有效，但是使用继承的元素进行排序的情况很少发生，如果有的话。
// 如果原型链上的元素具有可能与排序交互的元素，则规范允许“依赖于实现”的行为。
复制

第四块代码

// Copy elements in the range 0..length from obj's prototype chain
// to obj itself, if obj has holes. Return one more than the maximal index
// of a prototype property.
var CopyFromPrototype = function CopyFromPrototype(obj, length) {
  var max = 0;
  for (var proto = %object_get_prototype_of(obj); 
        proto;
        proto = %object_get_prototype_of(proto)) {
        var indices = IS_PROXY(proto) ? length : %GetArrayKeys(proto, length);
        if (IS_NUMBER(indices)) {
            // It's an interval.
            var proto_length = indices;
            for (var i = 0; i < proto_length; i++) {
                if (!HAS_OWN_PROPERTY(obj, i) && HAS_OWN_PROPERTY(proto, i)) {
                obj[i] = proto[i];
                if (i >= max) { max = i + 1; }
            }
        }
        } 
        else {
            for (var i = 0; i < indices.length; i++) {
                var index = indices[i];
                if (!HAS_OWN_PROPERTY(obj, index) && HAS_OWN_PROPERTY(proto, index)) {
                    obj[index] = proto[index];
                    if (index >= max) { max = index + 1; }
                }
            }
        }
    }
    return max;
};
复制

这块代码是对于非数组的一个处理
注释里面说到

// 如果obj有holes(能猜出大概意思，不咋好翻译这个hole)
// 就把obj原型链上0-length所有元素赋值给obj本身
// 返回一个max，max是比原型属性索引最大值+1
复制

返回的max会在下面用到

第五块代码

if (!is_array && (num_non_undefined + 1 < max_prototype_element)) {
  // For compatibility with JSC, we shadow any elements in the prototype
  // chain that has become exposed by sort moving a hole to its position.
  ShadowPrototypeElements(array, num_non_undefined, max_prototype_element);
}
复制

注释翻译：

// 为了与JSC兼容
// 我们对原型链中通过sort将一个hole移动到其位置而暴露的所有元素
// 进行shadow处理。
复制

可能因为英语语法水平不够，单看注释还有点不明白
大致意思是，把“掀开的东西，再盖上”
直接看下面一块代码，看看这个shadow操作到底干了啥叭

第六块代码

// Set a value of "undefined" on all indices in the range from..to
// where a prototype of obj has an element. I.e., shadow all prototype
// elements in that range.
var ShadowPrototypeElements = function(obj, from, to) {
  for (var proto = %object_get_prototype_of(obj); proto;
        proto = %object_get_prototype_of(proto)) {
        var indices = IS_PROXY(proto) ? to : %GetArrayKeys(proto, to);
        if (IS_NUMBER(indices)) {
          // It's an interval.
          var proto_length = indices;
          for (var i = from; i < proto_length; i++) {
            if (HAS_OWN_PROPERTY(proto, i)) {
              obj[i] = UNDEFINED;
            }
          }
        } 
        else {
            for (var i = 0; i < indices.length; i++) {
                var index = indices[i];
                if (from <= index && HAS_OWN_PROPERTY(proto, index)) {
                    obj[index] = UNDEFINED;
                }
            }
        }
    }
};
复制

这块代码就是shadow操作，注释翻译如下：

// 在范围从..到obj原型包含元素的所有索引上设置一个“undefined”值。
// 换句话说
// 在该范围内对所有原型元素进行shadow处理。
复制

其中：
I.e.是拉丁文id est 的缩写，它的意思就是“那就是说，换句话说”
英文不够你用了是不你还要写拉丁文？！

果然大致的意思猜的没错
在刚刚把对象的原型属性的复制，现在要设置undefined来shadow他了

第七块代码

if (num_non_undefined == -1) {
  // There were indexed accessors in the array.
  // Move array holes and undefineds to the end using a Javascript function
  // that is safe in the presence of accessors.
  num_non_undefined = SafeRemoveArrayHoles(array);
}
复制

意思是数组中有索引访问器。使用JS函数将数组hole和未定义项移到末尾，该函数在访问器存在时是安全的。
下面是安全移出数组hole方法

var SafeRemoveArrayHoles = function SafeRemoveArrayHoles(obj) {
  // Copy defined elements from the end to fill in all holes and undefineds
  // in the beginning of the array.  Write undefineds and holes at the end
  // after loop is finished.
    var first_undefined = 0;
    var last_defined = length - 1;
    var num_holes = 0;
    while (first_undefined < last_defined) {
        // Find first undefined element.
        while (first_undefined < last_defined &&
            !IS_UNDEFINED(obj[first_undefined])) {
            first_undefined++;
        }
        // Maintain the invariant num_holes = the number of holes in the original
        // array with indices <= first_undefined or > last_defined.
        if (!HAS_OWN_PROPERTY(obj, first_undefined)) {
          num_holes++;
        }
        // Find last defined element.
        while (first_undefined < last_defined &&
            IS_UNDEFINED(obj[last_defined])) {
            if (!HAS_OWN_PROPERTY(obj, last_defined)) {
                num_holes++;
            }
            last_defined--;
        }
        if (first_undefined < last_defined) {
            // Fill in hole or undefined.
            obj[first_undefined] = obj[last_defined];
            obj[last_defined] = UNDEFINED;
        }
    }
    // If there were any undefineds in the entire array, first_undefined
    // points to one past the last defined element.  Make this true if
    // there were no undefineds, as well, so that first_undefined == number
    // of defined elements.
    if (!IS_UNDEFINED(obj[first_undefined])) first_undefined++;
    // Fill in the undefineds and the holes.  There may be a hole where
    // an undefined should be and vice versa.
    var i;
    for (i = first_undefined; i < length - num_holes; i++) {
        obj[i] = UNDEFINED;
    }
    for (i = length - num_holes; i < length; i++) {
        // For compatability with Webkit, do not expose elements in the prototype.
        if (i in %object_get_prototype_of(obj)) {
            obj[i] = UNDEFINED;
        } else {
            delete obj[i];
        }
    }
    // Return the number of defined elements.
    return first_undefined;
};
复制

还会判断数组长度

if (length < 2) return array;
复制