你在错误的进行字符遍历

有问题相关代码

jsx

function pickUnicodeFor(value) {
	let unicodeSet = new Set();
	let strLen = value.length;
	for (let i = 0; i < strLen; i++) {
		let word = value.charCodeAt(i);
		if (word && !unicodeSet.has(word)) {
			word = `\\u${word.toString(16).toUpperCase()}`;
			unicodeSet.add(word);
		}
	}
	return Array.from(unicodeSet);
}

const unicodeList = pickUnicodeFor("𤋮");
console.log("unicodeList:", unicodeList);

你能看出以上代码有什么问题吗？输出结果为：

jsx

unicodeList: [ '\\uD850', '\\uDEEE' ]

发现问题：利用charCodeAt遍历字符，会导致把3个字节的字拆成2个字，如果自己来写这个汉字解析有太麻烦了，那怎么办呢？

其实js早就提供了方案，只是绝大部分人不知道而已，它就是：for of 来迭代字符

正确遍历字符的方式

jsx

//正确：遍历方式，forof 会自动遍历出正确的多个字节的字，避免将多字节字拆成多个字的情况
function pickUnicodeForOf(value) {
	let unicodeSet = new Set();
	for (const ch of value) {
		let codePoint = ch.codePointAt(0);
		let word = `\\u{${codePoint.toString(16).toUpperCase()}}`;
		unicodeSet.add(word);
	}
	const unicodeList = Array.from(unicodeSet);
	console.log(`unicodeList: ${unicodeList}`);
}

pickUnicodeForOf("𤋮");
//output:
//unicodeList: \u{242EE}

总结

字符遍历不要用for index 形式遍历，而是采用for of 方式遍历

你在错误的进行字符遍历 ​

有问题相关代码 ​

正确遍历字符的方式 ​

总结 ​

你在错误的进行字符遍历

有问题相关代码

正确遍历字符的方式

总结