Module:Hrkt-translit
Ang dokumentasyon ng ng modyul na ito ay maaaring likhain sa Module:Hrkt-translit/doc
local concat = table.concat
local insert = table.insert
local load_data = mw.loadData
local toNFD = mw.ustring.toNFD
local umatch = mw.ustring.match
local m_ja = require("Module:ja")
local kata_to_hira = m_ja.kata_to_hira
local normalize_kana = m_ja.normalize_kana
local data_common
local glottal = "\1"
local disambig = "\2"
local cons = "b-df-hj-np-tvxz"
local export = {}
local function get_initial(text)
return umatch(text, "(.+)%f[" .. umatch(text, ".$") .. "]") or text
end
local function handle_initials(data, d_voicing, d_semivoicing, initials, checked)
if not data then
return
end
for k, v in pairs(data) do
if not checked[k] and umatch(v, "^%a+$") then
local initial = get_initial(v)
if initial:match("^[" .. cons .. "]+$") then
initials[initial] = true
end
local v_initial, sv_initial = d_voicing[initial], d_semivoicing[initial]
if v_initial and v_initial:match("^[" .. cons .. "]+$") then
initials[v_initial] = true
end
if sv_initial and sv_initial:match("^[" .. cons .. "]+$") then
initials[sv_initial] = true
end
end
checked[k] = true
end
end
function export.process_data(data, common)
local initials, checked, d_voicing, d_semivoicing = {}, {}, data.tr_voicing, data.tr_semivoicing
data.initials = initials
if not common then
data_common = data_common or load_data("Module:Hrkt-translit/data")
d_voicing = d_voicing or data_common.tr_voicing
d_semivoicing = d_semivoicing or data_common.tr_semivoicing
end
handle_initials(data.rom, d_voicing, d_semivoicing, initials, checked)
if not common then
handle_initials(data_common.rom, d_voicing, d_semivoicing, initials, checked)
end
return data
end
local function get_data(lang)
data_common = data_common or load_data("Module:Hrkt-translit/data")
local function inspect_table(t, ...)
for i = 1, select("#", ...) do
if type(t) == "table" then
t = t[select(i, ...)]
else return nil end
end
return t
end
if lang then
local name_data = "Module:Hrkt-translit/data/" .. lang
if package.loaders[2](name_data) then
local data_lang = load_data(name_data)
return function(...)
local item_lang, item_common = data_lang[...], data_common[...]
for i = 2, select("#", ...) do
local key = select(i, ...)
if type(item_lang) == "table" then
item_lang = item_lang[key]
else return inspect_table(item_common, select(i, ...)) end
if type(item_common) == "table" then
item_common = item_common[key]
else return inspect_table(item_lang, select(i + 1, ...)) end
end
if item_lang ~= nil then return item_lang else return item_common end
end
end
end
return function(...)
return inspect_table(data_common[...], select(2, ...))
end
end
local function do_voicing(i_last, result, result_sp, hist, d, key)
local text = result[i_last]
if not hist and result_sp[i_last] == "historical w" then
text = "w" .. text
end
return text:gsub("^" .. get_initial(text), d(key))
end
function export.tr(text, lang, sc, options)
if umatch(text, "[" .. mw.loadData("Module:ja/data/range").kanji .. "]") then
require("Module:debug").track("ja/invalid Hrkt")
end
options = options or {}
local result = {[0] = ""}
local result_sp = {}
local d = get_data(lang)
local function getlast(i_start, predicate_good, predicate_bad)
local in_xml = false
for i = i_start or #result, 1, -1 do
if in_xml then
if result[i] == "<" then in_xml = false end
elseif result[i] == ">" then
in_xml = true
else
if (predicate_bad or function(index)
return result_sp[index] == "stop"
end)(i) then break end
if (predicate_good or function(index)
return result[index]:len() > 0 and result_sp[index] ~= "'"
end)(i) then return i end
end
end
return 0
end
-- normalize long vowels and iteration marks
text = toNFD(kata_to_hira(normalize_kana(text)))
for c in text:gmatch(".[\128-\191]*") do
local rc = options.hist and d("rom_hist", c) or d("rom", c) or c
local rc_sp = d("rom_sp", c)
local i_last = getlast()
if options.keep_dot and c == "." then
rc = "."
elseif c:match("%a") then
rc_sp = "stop"
end
local repl_digraph = d("digraph", c, result[i_last])
if repl_digraph then
result[i_last], rc = repl_digraph, ""
result_sp[i_last], rc_sp = nil, nil
end
if not options.hist then --はへ
if d("flag_hahe", result_sp[i_last]) and (umatch(c, "[-~%.゙゚]") or rc:match("[-~%a" .. glottal .. "]")) then
result[i_last] = result_sp[i_last]
result_sp[i_last] = nil
end
if d("flag_hahe", rc_sp) and (options.phonetic or result_sp[getlast(nil, function(i)
return result[i]:len() > 0 and result_sp[i] ~= "'" or result_sp[i] == "stop"
end, function() return false end)] == "stop" or result[i_last]:match("[-~%a" .. glottal .. "]")) then
rc = rc_sp
rc_sp = nil
end
end
if rc:match("%a") and umatch(result[i_last], "^[,%.?!:)Ӡ]$") then --space and punctuations
result[i_last] = result[i_last] .. " "
elseif umatch(rc, "^[(“]$") and result[i_last]:match("%a") then
rc = " " .. rc
end
if rc_sp == "voiced" then -- voicing
result[i_last] = do_voicing(i_last, result, result_sp, options.hist, d, "tr_voicing")
elseif rc_sp == "semivoiced" then
result[i_last] = do_voicing(i_last, result, result_sp, options.hist, d, "tr_semivoicing")
end
if rc:match("[" .. cons .. "]+" .. "$") and rc_sp ~= "stop" then
rc_sp = "coda"
end
local r_last = result[i_last]
local r_lastlast = r_last:match"^.*(%a%A*)$" --vowel clusters or stop consonants
if r_lastlast and r_lastlast:match("[aiueo]") then
if rc:match("^%-[yw]") and r_last:match("^[" .. cons .. "yw]") then
local rc_first = rc:sub(2, 2)
r_last = #r_last > 1 and r_last:sub(1, -2) or r_last
if not (rc_first == "y" and d("flag_postalveolarconsonant", r_last)) then
r_last = r_last .. rc_first
end
result[i_last] = r_last
rc = rc:sub(3)
elseif options.hist and r_last:match("^[" .. cons .. "]") and (
r_lastlast == "i" and rc:sub(1, 1) == "y" or
r_lastlast == "u" and rc:sub(1, 1) == "w"
) then
local rc_first = rc:sub(1, 1)
r_last = r_last:sub(1, -2)
if not (rc_first == "y" and d("flag_postalveolarconsonant", r_last)) then
r_last = r_last .. rc_first
end
result[i_last] = r_last
rc = rc:sub(2)
elseif rc:match"^%-[yw]?[aiueo]$" then
rc = rc:sub(2)
if r_lastlast == rc then
result[i_last] = r_last .. r_lastlast
rc = ""
elseif d("flag_specialconsonant", r_last) then
result[i_last] = r_last:sub(1, -2)
elseif r_lastlast == "i" then
result[i_last] = r_last:sub(1, -2) .. "y"
elseif r_lastlast:match("[ou]") and rc ~= "u" then
result[i_last] = r_last:sub(1, -2) .. "w"
elseif #r_last > 1 then
result[i_last] = r_last:sub(1, -2)
end
end
end
insert(result, rc)
result_sp[#result] = rc_sp
end
if not options.hist then --isolated はへ
local i_last = getlast()
if d("flag_hahe", result_sp[i_last]) and getlast(i_last - 1) == 0 then
result[i_last] = result_sp[i_last]
end
end
local has_gem = false
for i, v in ipairs(result) do
--gemination
if has_gem then
local apos, consonant, remainder = v:match("^(" .. glottal .. "*)([" .. cons .. "yw]+)(.*)")
if consonant then
local init, c_gem = apos .. consonant
while true do
c_gem = d("tr_gem", init)
if #init == 1 or not init:match("[yw]$") then
break
end
init = init:sub(1, -2)
end
c_gem = c_gem or init:sub(1, 1)
v = consonant .. remainder
local i_gem = getlast(i)
while true do
i_gem = getlast(i_gem - 1)
if result_sp[i_gem] == "gem" then
result[i_gem] = c_gem
elseif result_sp[i_gem] ~= "allow gem" then
i_gem = getlast(i_gem + 1)
result[i_gem] = apos .. result[i_gem]
break
end
end
has_gem = false
end
elseif result_sp[i] == "gem" then
has_gem = true
end
-- FIXME: ng/nw should be determined automatically by a disambiguation model.
local v_first = v:match("^[aiueoyw]") or v:match("^n[gw]")
if v_first then
local i_last
if v_first == "y" or v_first == "w" or v_first == "ng" or v_first == "nw" then
i_last = getlast(i - 1, function(index)
local res, res_sp = result[index], result_sp[index]
return res ~= "" and res ~= "." and res_sp ~= "'" and res_sp ~= "gem"
end, function() end)
else
i_last = getlast(i - 1, nil, function() end)
end
if v_first:sub(1, 1) == "n" then
if umatch(result[i_last], "%a") and not (v_first == "nw" and result[i_last]:match("n$")) then
v = disambig .. v
end
elseif result_sp[i_last] == "coda" then
local coda = d("tr_coda_apos", v_first, result[i_last])
if coda == nil or options.hist and coda == "hist" then
v = disambig .. v
end
end
end
--Diacritics (long vowels and others).
v = v:gsub("[aiueo][aiueo%A]*", d("tr_long")) -- From small kana.
local i_last = getlast(i - 1)
local r_last = result[i_last]
-- From digraphs.
if r_last and not (options.hist or options.phonetic or options.no_diacritics) then
local r_lastlast = r_last:match"^.*(%a%A*)$" --vowel clusters or stop consonants
if r_lastlast and d("tr_long", r_lastlast .. v) and not r_last:match("[aiueo][aiueo]$") then
result[i_last] = (r_last .. v):gsub("[aiueo][aiueo%A]*", d("tr_long"))
v = ""
end
end
result[i] = v
end
local num_cap = 0
for i, v in ipairs(result) do
--uppercase
if result_sp[i] == "cap" then
num_cap = num_cap + 1
end
if num_cap > 0 then
result[i] = v:gsub(".[\128-\191]*", function(c)
if num_cap <= 0 then return c end
local uc = c:uupper()
if c ~= uc then num_cap = num_cap - 1 end
return uc
end)
end
end
return (concat(result):gsub("[" .. glottal .. disambig .. "]", "'"))
end
return export