parseInt(string, radix)

在使用过程中发现一个问题,看下面的测试代码结果:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
const noop = () => {}
var vals = [
  1, // 0
  '1', // 1
  NaN, // 2
  null, // 3
  {}, // 4
  false, // 5
  true, // 6
  { length: 2, 0: 'a', 1: 'b' }, // 7
  { a: 1 }, //8
  [], // 9
  [1, 2], // 10
  noop, // 11
  undefined, // 12
  void 0 // 13
]

// 在执行之前,上面结果会是什么呢?
const result = vals.map(v => parseInt(v, 10))
console.log('\n', result)
console.log('[1,2] => ', result[10])

 [
    1,   1, NaN, NaN, NaN,
  NaN, NaN, NaN, NaN, NaN,
    1, NaN, NaN, NaN
]
[1,2] =>  1
undefined

发现问题没,疑惑点就 parseInt[1,2] // 10 的时候,结果居然是 1 ?

parseInt 实现伪码:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
const ToString = (s) => "" + s;
const TrimString = (s) => (s || "").trim();
const ToInt32 = (v) => +v;
const log = console.log;
function parseInt(string, radix) {
  let inputString = ToString(string); // ~捂脸~,这一行就说明问题了
  let S = TrimString(inputString);
  let sign = 1;
  // 0x002D -> '-', S.codePointAt(0) === 0x002D
  if (S) {
    const ch = S[0];
    if (ch === "-") {
      sign = -1;
      // 删除 -
      S = S.slice(1);
    } else if (ch === "+") {
      sign = 1;
      // 删除 +
      S = S.slice(1);
    }
  }

  // 进制
  let R = ToInt32(radix || 0);
  let stripPrefix = true;
  if (R !== 0) {
    // 只支持 2~36 进制
    if (R < 2 || R > 36) return NaN;
    if (R !== 16) {
      stripPrefix = false;
    }
  } else {
    // 为 0 时默认 10 进制
    R = 10;
  }

  if (stripPrefix) {
    if (S.length > 2 && S.slice(0, 2).toLowerCase() === "0x") {
      S = S.slice(2); // delete '0x' or '0X'
      R = 16;
    }
  }

  // 如果 S 中包含非进制内的字符,将 end 重置为该字符的索引值
  let len = S.length,
    end = len - 1;
  const table = "0123456789abcdefghijklmnopqrstuvwxyz".slice(0, radix);
  for (let i = 0; i < len; i++) {
    const c = S[i].toLowerCase();
    // 10 进制是 0 - 9
    // 2 -> 0, 1
    // 3 -> 0, 1, 2
    // ...
    // 36 -> 0, ..., 9, a, ..., z
    if (!table.includes(c)) {
      // 这里假设是 10进制
      end = i; // 第一个非进制内的字符
      break;
    }
  }
  let Z = S.slice(0, end > 0 ? end + 1 : 0);
  if (!Z) return NaN;

  let mathInt = parseWithRadix(Z, R);

  // mathInt 转成对应进制的数
  if (mathInt === 0) {
    return sign === -1 ? -0 : +0;
  }
  return sign * mathInt;
}

function parseWithRadix(Z, radix = 10) {
  let mathInt = 0;

  const zLen = Z.length;

  const base = "a".codePointAt(0);
  for (let i = 0; i < zLen; i++) {
    let c = Z[i].toLowerCase();

    if (c >= "a" && c <= "z" && radix > 10) {
      c = 10 + c.codePointAt(0) - base;
    }

    mathInt += +c * Math.pow(radix, zLen - i - 1);
  }
  return mathInt;
}

function test(val) {
  let result = [];
  for (let i = 0; i < 40; i++) {
    result.push(`R: ${i}, V: ${parseInt(val, i)}`);
  }
  log("\n", result);
}
test("EEE");
log(parseInt("0xEEE", 16));
log(parseInt("111111", 2));
test("-20AFE");

 [
  'R: 0, V: NaN',    'R: 1, V: NaN',    'R: 2, V: NaN',
  'R: 3, V: NaN',    'R: 4, V: NaN',    'R: 5, V: NaN',
  'R: 6, V: NaN',    'R: 7, V: NaN',    'R: 8, V: NaN',
  'R: 9, V: NaN',    'R: 10, V: NaN',   'R: 11, V: NaN',
  'R: 12, V: NaN',   'R: 13, V: NaN',   'R: 14, V: NaN',
  'R: 15, V: 3374',  'R: 16, V: 3822',  'R: 17, V: 4298',
  'R: 18, V: 4802',  'R: 19, V: 5334',  'R: 20, V: 5894',
  'R: 21, V: 6482',  'R: 22, V: 7098',  'R: 23, V: 7742',
  'R: 24, V: 8414',  'R: 25, V: 9114',  'R: 26, V: 9842',
  'R: 27, V: 10598', 'R: 28, V: 11382', 'R: 29, V: 12194',
  'R: 30, V: 13034', 'R: 31, V: 13902', 'R: 32, V: 14798',
  'R: 33, V: 15722', 'R: 34, V: 16674', 'R: 35, V: 17654',
  'R: 36, V: 18662', 'R: 37, V: NaN',   'R: 38, V: NaN',
  'R: 39, V: NaN'
]
3822
63

 [
  'R: 0, V: NaN',       'R: 1, V: NaN',
  'R: 2, V: NaN',       'R: 3, V: NaN',
  'R: 4, V: NaN',       'R: 5, V: NaN',
  'R: 6, V: NaN',       'R: 7, V: NaN',
  'R: 8, V: NaN',       'R: 9, V: NaN',
  'R: 10, V: NaN',      'R: 11, V: -2787',
  'R: 12, V: -3591',    'R: 13, V: -4539',
  'R: 14, V: -5643',    'R: 15, V: -6915',
  'R: 16, V: -133886',  'R: 17, V: -170201',
  'R: 18, V: -213476',  'R: 19, V: -264551',
  'R: 20, V: -324314',  'R: 21, V: -393701',
  'R: 22, V: -473696',  'R: 23, V: -565331',
  'R: 24, V: -669686',  'R: 25, V: -787889',
  'R: 26, V: -921116',  'R: 27, V: -1070591',
  'R: 28, V: -1237586', 'R: 29, V: -1423421',
  'R: 30, V: -1629464', 'R: 31, V: -1857131',
  'R: 32, V: -2107886', 'R: 33, V: -2383241',
  'R: 34, V: -2684756', 'R: 35, V: -3014039',
  'R: 36, V: -3372746', 'R: 37, V: NaN',
  'R: 38, V: NaN',      'R: 39, V: NaN'
]
undefined

关键点:

  1. 输入值一进来就会进行字符串化,这也就解释了为什么数组 [1,2] 最后得到的结果是 1

  2. 进制数的处理,为 0 时 radix = 10, 非 [2, 36] 区间的数视为非法进制

  3. 输入的内容(string) 如何转成对应进制的数,即 parse(Z, radix) 函数

字符串转数字函数:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
function parse(Z, radix = 10) {
  let mathInt = 0;

  const zLen = Z.length;

  const base = 'a'.codePointAt(0)
  for (let i = 0; i < zLen; i++) {
    let c = Z[i].toLowerCase()

    if (c >= 'a' && c <= 'z' && radix === 16) {
      c = 10 + c.codePointAt(0) - base
    }

    mathInt += +c * Math.pow(radix, zLen - i - 1);
  }
  return mathInt
}
const log = console.log
log(parse('EEE', 16))
3822
undefined

❓❓❓ parseInt(0.0000005) = 5 ❓❓❓

测试:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
function test(res) {
  console.log(`parseInt(${res}) = ` + parseInt(res))
}

test(0.5)
test(0.05)
test(0.005)
test(0.0005)
test(0.00005)
test(0.000005)
test(0.0000005)
test(0.00000005)
test(0.000000005)
parseInt(0.5) = 0
parseInt(0.05) = 0
parseInt(0.005) = 0
parseInt(0.0005) = 0
parseInt(0.00005) = 0
parseInt(0.000005) = 0
parseInt(5e-7) = 5
parseInt(5e-8) = 5
parseInt(5e-9) = 5
undefined

😭😭😭😭😭😭😭😭😭😭

结果不用分析了吧,上面的结果很明显了,超过6位的小数在 toString 的时候被转成了指 数表示形式了,而 parseInt 实现伪码一开始就会将传进去的数字转成字符串,所以最后在 识别字符串的时候遇到了 e 就终止了,最终得到 5 的结果。

本着追根究底的原则,还是去看下 toString 里面是怎么处理的吧 🚆

ECMAScript® 2022 Language Specification - toString

/img/tmp/ecma-tostring.png

这里就要看下数字是如何转成字符串的: Number::toString ( x )