Амодуль:Unicode convert
Для документации этого модуля может быть создана страница Амодуль:Unicode convert/doc
local p = {}
-- NOTE: all these functions use frame solely for its args member.
-- Modules using them may therefore call them with a fake frame table
-- containing only args.
p.getUTF8 = function (frame)
local ch = mw.ustring.char(tonumber(frame.args[1] or '0', 16) or 0)
local bytes = {mw.ustring.byte(ch, 1, -1)}
local format = ({
['10'] = '%d',
dec = '%d'
})[frame.args['base']] or '%02X'
for i = 1, #bytes do
bytes[i] = format:format(bytes[i])
end
return table.concat(bytes, ' ')
end
p.getUTF16 = function (frame)
local codepoint = tonumber(frame.args[1] or '0', 16) or 0
local format = ({ -- TODO reduce the number of options.
['10'] = '%d',
dec = '%d'
})[frame.args['base']] or '%04X'
if codepoint <= 0xFFFF then -- NB this also returns lone surrogate characters
return format:format(codepoint)
elseif codepoint > 0x10FFFF then -- There are no codepoints above this
return ''
end
codepoint = codepoint - 0x10000
bit32 = require('bit32')
return (format .. ' ' .. format):format(
bit32.rshift(codepoint, 10) + 0xD800,
bit32.band(codepoint, 0x3FF) + 0xDC00)
end
p.fromUTF8 = function(frame)
local basein = frame.args['basein'] == 'dec' and 10 or 16
local format = frame.args['base'] == 'dec' and '%d ' or '%02X '
local bytes = {}
for byte in mw.text.gsplit(frame.args[1], '%s') do
table.insert(bytes, tonumber(byte, basein))
end
local chars = {mw.ustring.codepoint(string.char(unpack(bytes)), 1, -1)}
return format:rep(#chars):sub(1, -2):format(unpack(chars))
end
return p