javascript wchar_t 宽字符转化为 ascii字符码数组

开发者一鸣技术 2022年11月6日

0 收藏 991 点赞 973 浏览 1132 个字

String.prototype.charCodeAt

String.fromCharCode()

String.prototype.toUtfArray = function() {
return this.split('').reduce(function(a, c) {
var code = c.charCodeAt(0);
a.push(code >> 8);
a.push(code & 0x0ff);
return a;
}, []);
};
String.fromUtfArray = function(a) {
// 长度偶数个 否则前面补0
if (a.length % 2 ===1) {
a.unshift(0);
}
var wa = [], code = 0;
for (var i = 0; i< a.length; i++) {
if (i%2===0) {
code = a[i] << 8;
} else {
code |= a[i];
wa.push(code);
}
}
return wa.map(function(c) {
return String.fromCharCode(c);
}).join('');
};

test:

var s1 = "你好a";
var a = s1.toUtfArray(); // [79, 96, 89, 125, 0, 97]
console.log(a.toString());
var s2 = String.fromUtfArray(a);
console.log(s2); //  "你好a"

* UTF-8 变长

字符 UTF-8编码 Byte 1 Byte 2 Byte 3

A 01000001

Ö 11000011 10010110

中 11100100 10111000 10101101

———————————————————

Binary Hex Comments
0xxxxxxx 0x00..0x7F Only byte of a 1-byte character encoding
10xxxxxx 0x80..0xBF Continuation bytes (1-3 continuation bytes)
110xxxxx 0xC0..0xDF First byte of a 2-byte character encoding
1110xxxx 0xE0..0xEF First byte of a 3-byte character encoding
11110xxx 0xF0..0xF7 First byte of a 4-byte character encoding