Files
Odin/core/utf16.odin
T
2017-06-12 11:48:12 +01:00

59 lines
1.1 KiB
Odin

const REPLACEMENT_CHAR = '\uFFFD';
const MAX_RUNE = '\U0010FFFF';
const _surr1 = 0xd800;
const _surr2 = 0xdc00;
const _surr3 = 0xe000;
const _surr_self = 0x10000;
const is_surrogate = proc(r: rune) -> bool {
return _surr1 <= r && r < _surr3;
}
const decode_surrogate_pair = proc(r1, r2: rune) -> rune {
if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self;
}
return REPLACEMENT_CHAR;
}
const encode_surrogate_pair = proc(r: rune) -> (r1, r2: rune) {
if r < _surr_self || r > MAX_RUNE {
return REPLACEMENT_CHAR, REPLACEMENT_CHAR;
}
r -= _surr_self;
return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff;
}
const encode = proc(d: []u16, s: []rune) {
var n = len(s);
for r in s {
if r >= _surr_self {
n++;
}
}
var max_n = min(len(d), n);
n = 0;
for r in s {
match r {
case 0..<_surr1, _surr3..<_surr_self:
d[n] = u16(r);
n++;
case _surr_self..MAX_RUNE:
var r1, r2 = encode_surrogate_pair(r);
d[n] = u16(r1);
d[n+1] = u16(r2);
n += 2;
case:
d[n] = u16(REPLACEMENT_CHAR);
n++;
}
}
}