মডিউল:cpx-pron
অবয়ব
এই মডিউলের জন্য মডিউল:cpx-pron/নথি-এ নথিপত্র তৈরি করা হয়ে থাকতে পারে
local export = {}
local m_string_utils = require("Module:string utilities")
local m_table = require("Module:table")
local m_data = require("Module:cpx-pron/data")
local sub = m_string_utils.sub
local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local toNFD = mw.ustring.toNFD
local toNFC = mw.ustring.toNFC
local SPECIAL_MARKERS = {
NO_ASSIMILATION = "*",
NO_SANDHI = "#",
MANUAL_CHANGE = ">",
CAPITALIZATION = "^",
SPACE_AFTER = "\\"
}
local FORMAT_MODES = {
BRIEF = "brief",
COMPLETE = "complete",
DEMO = "demo"
}
local dialects = {
pt = "[[w:Putian dialect|Putian]]",
nr = "[[w:Nanri Island|Nanri]]",
jk = "[[w:zh:江口鎮 (莆田市)|Jiangkou]]",
xy = "[[w:Xianyou dialect|Xianyou]]",
ft = "[[w:zh:楓亭鎮|Fengting]]",
yy = "[[w:zh:游洋鎮|Youyang]]"
}
local initials = {
pt = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
[""] = ""
},
jk = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
[""] = ""
},
nr = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
[""] = ""
},
xy = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
[""] = "",
["w"] = "β",
},
yy = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "θ",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
[""] = "",
["w"] = "β",
},
ft = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
[""] = "",
["w"] = "β",
},
}
local finals = {
pt = {
["a"] = "a", ["ae"] = "ɛ", ["e"] = "e", ["i"] = "i", ["o"] = "o",
["oe"] = "ø", ["or"] = "ɒ", ["u"] = "u", ["y"] = "y",
["ai"] = "ai", ["ao"] = "au", ["ia"] = "ia", ["ieo"] = "ieu", ["iu"] = "iu",
["ou"] = "ɔu", ["ua"] = "ua", ["uei"] = "uei", ["ui"] = "ui", ["yor"] = "yɒ",
["ang"] = "aŋ", ["orng"] = "ɒŋ", ["eng"] = "ɛŋ", ["oeng"] = "œŋ", ["ong"] = "ɔŋ",
["ing"] = "iŋ", ["ieng"] = "iɛŋ", ["ung"] = "uŋ", ["uang"] = "uaŋ", ["yng"] = "yŋ",
["yorng"] = "yɒŋ", ["ng"] = "ŋ̍",
["ah"] = "aʔ", ["orh"] = "ɒʔ", ["eh"] = "ɛʔ", ["oeh"] = "œʔ", ["oh"] = "ɔʔ",
["ih"] = "iʔ", ["iah"] = "iæʔ", ["uh"] = "uʔ", ["uah"] = "uaʔ", ["ieh"] = "iɛʔ",
["uoh"] = "uoʔ", ["yh"] = "yʔ", ["yorh"] = "yɒʔ",
},
jk = {
["a"] = "a", ["e"] = "e", ["ae"] = "ɛ", ["eo"] = "ø", ["oe"] = "œ",
["or"] = "ɒ", ["o"] = "o", ["i"] = "i", ["u"] = "u", ["y"] = "y",
["ie"] = "ie", ["iao"] = "iɐu", ["iu"] = "iu", ["ai"] = "ai", ["ao"] = "au",
["ou"] = "ou", ["uo"] = "uo", ["uai"] = "uɐi", ["ui"] = "ui", ["yoe"] = "yø",
["ang"] = "aŋ", ["eng"] = "ɛŋ", ["ing"] = "iŋ", ["ung"] = "uŋ", ["ieng"] = "iɛŋ",
["orng"] = "ɒŋ", ["oeng"] = "œŋ", ["ong"] = "ɔŋ", ["ng"] = "ŋ̍",
["ah"] = "aʔ", ["eh"] = "ɛʔ", ["ih"] = "eʔ", ["oh"] = "ɔʔ", ["orh"] = "ɒʔ",
["oeh"] = "œʔ"
},
nr = {
["a"] = "a", ["e"] = "e", ["oe"] = "ø", ["or"] = "ɒ", ["o"] = "o",
["i"] = "i", ["u"] = "u", ["y"] = "y", ["ia"] = "ia", ["ieo"] = "ieu",
["iu"] = "iu", ["ai"] = "ai", ["ao"] = "au", ["oo"] = "ɔ", ["ua"] = "ua",
["uei"] = "uei", ["ui"] = "ui",
["ang"] = "aŋ", ["eng"] = "e̞ŋ", ["ing"] = "iŋ", ["ong"] = "oŋ",
["orng"] = "ɒŋ", ["oeng"] = "œŋ", ["uerng"] = "uəŋ", ["yng"] = "yŋ", ["ng"] = "ŋ̍",
["ah"] = "aʔ", ["eh"] = "e̞ʔ", ["ih"] = "iʔ", ["oh"] = "oʔ", ["orh"] = "ɒʔ",
["oeh"] = "œʔ", ["uerh"] = "uəʔ", ["yh"] = "yʔ"
},
xy = {
["a"] = "a", ["ae"] = "ɛ", ["e"] = "e", ["i"] = "i", ["o"] = "o",
["oe"] = "ø", ["or"] = "ɒ", ["u"] = "u", ["y"] = "y",
["ai"] = "ai", ["ao"] = "au", ["ia"] = "ia", ["ieo"] = "ieu", ["iu"] = "iu",
["ou"] = "ɔu", ["ua"] = "ua", ["uei"] = "uei", ["ui"] = "ui", ["ya"] = "ya",
["ang"] = "aŋ", ["orng"] = "ɒŋ", ["eng"] = "ɛŋ",
["ing"] = "iŋ", ["ieng"] = "iɛŋ", ["yng"] = "yŋ",
["yoeng"] = "yøŋ", ["uong"] = "uoŋ", ["ng"] = "ŋ̍",
["ah"] = "aʔ", ["orh"] = "ɒʔ", ["eh"] = "ɛʔ",
["ih"] = "iʔ", ["ieh"] = "iɛʔ", ["uh"] = "uʔ",
["uoh"] = "uoʔ", ["yh"] = "yʔ", ["yoeh"] = "yøʔ",
["iah"] = "iaʔ", ["uah"] = "uaʔ", -- iah, uah only for 代詞促調
["ann"] = "ã", ["inn"] = "ĩ", ["ynn"] = "ỹ", ["ornn"] = "ɒ̃", ["ainn"] = "ãĩ",
["aonn"] = "ãũ", ["iann"] = "ĩã", ["iunn"] = "ĩũ", ["uann"] = "ũã", ["uinn"] = "ũĩ",
["yann"] = "ỹã",
},
yy = {
["a"] = "a", ["e"] = "e", ["oe"] = "ø", ["or"] = "ɒ", ["o"] = "o",
["i"] = "i", ["u"] = "u", ["y"] = "y", ["ia"] = "ia", ["ieo"] = "iəu",
["iu"] = "iu", ["ai"] = "ai", ["ao"] = "au", ["ou"] = "ou", ["ua"] = "ua",
["uai"] = "uai", ["oi"] = "oi", ["ui"] = "ui", ["ya"] = "ya",
["ang"] = "aŋ", ["eng"] = "ɛŋ", ["ing"] = "iŋ", ["ong"] = "oŋ", ["ung"] = "uŋ",
["ieng"] = "iɛŋ", ["orng"] = "ɒŋ", ["oeng"] = "œŋ", ["uang"] = "uaŋ",
["yang"] = "yɐŋ", ["yng"] = "yŋ", ["ng"] = "ŋ̍",
["ah"] = "aʔ", ["eh"] = "ɛʔ", ["ih"] = "iʔ", ["oh"] = "oʔ", ["orh"] = "ɒʔ",
["oeh"] = "œʔ", ["ieh"] = "iɛʔ", ["uah"] = "uaʔ", ["yah"] = "yɐʔ", ["yh"] = "yʔ",
["ann"] = "ã", ["enn"] = "ẽ", ["oenn"] = "ø̃", ["ornn"] = "ɒ̃",
["iann"] = "ĩã", ["iunn"] = "ĩũ", ["uann"] = "ũã", ["uinn"] = "ũĩ", ["yann"] = "ỹã"
},
ft = {
["a"] = "a", ["e"] = "e", ["or"] = "ɒ", ["er"] = "ɤ", ["i"] = "i",
["u"] = "u", ["ia"] = "ia", ["ieo"] = "iəu", ["iu"] = "iu", ["ai"] = "ai",
["ao"] = "au", ["ou"] = "ou", ["ua"] = "ua", ["uei"] = "uei", ["ui"] = "ui",
["ang"] = "aŋ", ["eng"] = "ɛŋ", ["ing"] = "iŋ", ["orng"] = "ɒŋ",
["ieng"] = "ieŋ", ["uerng"] = "ɯəŋ", ["ng"] = "ŋ̍",
["ah"] = "aʔ", ["eh"] = "ɛʔ", ["ih"] = "iʔ", ["orh"] = "ɒʔ",
["ieh"] = "ieʔ", ["uerh"] = "ɯəʔ",
["ann"] = "ã", ["inn"] = "ĩ", ["ornn"] = "ɒ̃",
["iann"] = "ĩã", ["iunn"] = "ĩũ", ["uann"] = "ũã", ["uinn"] = "ũĩ"
}
}
-- 1 陰平 | 2 陽平 | 3 上聲 | 4 陰去 | 5 陽去 | 6A 陰入甲 | 6B 陰入乙 | 7A 陽入甲 | 7B 陽入乙
-- S1: variant of 1 in non-final position
-- S3: 代詞促調, sounds like 上聲 in both Putian and Xianyou after tone sandhi
local tones = {
pt = {
["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "⁴⁵³", ["4"] = "⁴²",
["5"] = "¹¹", ["6A"] = "²¹", ["6B"] = "¹¹", ["7A"] = "⁴", ["7B"] = "¹³",
["S1"] = "⁵⁵", ["S3"] = "³²", ["S7"] = "⁴⁵"
},
jk = {
["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "⁴⁵³", ["4"] = "⁴²",
["5"] = "¹¹", ["6A"] = "²¹", ["6B"] = "¹¹", ["7A"] = "⁴", ["7B"] = "⁴⁵³",
["S1"] = "⁵⁵", ["S3"] = "³²", ["S7"] = "⁴⁵"
},
nr = {
["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "⁴⁵³", ["4"] = "⁴²",
["5"] = "¹¹", ["6A"] = "²¹", ["6B"] = "¹¹", ["7A"] = "⁴", ["7B"] = "¹³",
["S1"] = "⁵⁵", ["S3"] = "³²", ["S7"] = "⁴⁵"
},
xy = {
["1"] = "⁵⁴⁴", ["2"] = "²⁴", ["3"] = "³³²", ["4"] = "⁴²",
["5"] = "²¹", ["6A"] = "²", ["6B"] = "²¹", ["7A"] = "⁴", ["7B"] = "²⁴",
["S1"] = "⁵⁵", ["S3"] = "³²"
},
yy = {
["1"] = "⁵⁴⁴", ["2"] = "²⁴", ["3"] = "³³²", ["4"] = "⁴²",
["5"] = "²¹", ["6A"] = "²", ["6B"] = "²¹", ["7A"] = "⁴", ["7B"] = "²⁴",
["S1"] = "⁵⁵", ["S3"] = "³²"
},
ft = {
["1"] = "⁵⁴⁴", ["2"] = "²⁴", ["3"] = "³³²", ["4"] = "⁴²",
["5"] = "²¹", ["6A"] = "²", ["6B"] = "²¹", ["7A"] = "⁴", ["7B"] = "²⁴",
["S1"] = "⁵⁵", ["S3"] = "³²"
},
}
local corrections = {
common = {
["au"] = "ao", ["ieu"] = "ieo", ["iau"] = "ieo"
},
pt = {
["iao"] = "ieo", ["yo"] = "yor", ["ue"] = "uei", ["uai"] = "uei",
["yoh"] = "yorh", ["yong"] = "yorng",
},
jk = {
["ia"] = "ie", ["ua"] = "uo", ["ue"] = "uai", ["uei"] = "uai",
["yo"] = "oe", ["yor"] = "oe", ["ye"] = "yoe",
},
nr = {
["iao"] = "ieo", ["ou"] = "oo", ["ue"] = "uei", ["yo"] = "ua",
["yor"] = "ua", ["ung"] = "ng", ["uang"] = "uerng", ["uah"] = "uerh",
},
xy = {
["iao"] = "ieo", ["ue"] = "uei", ["yeh"] = "yoeh",
["yeng"] = "yoeng", ["iang"] = "ieng", ["ung"] = "ng",
},
yy = {
["iao"] = "ieo", ["ue"] = "oi", -- or "uai"
["uei"] = "oi", -- or "uai"
},
ft = {
["o"] = "er", ["iao"] = "ieo", ["ue"] = "uei", ["ya"] = "ia",
["ung"] = "ng", ["uong"] = "uerng", ["erng"] = "uerng", ["erh"] = "uerh",
}
}
local function handle_nasalization(final)
local has_old_nasal = final:match("ⁿ$")
local has_new_nasal = final:match("nn$")
local base_final = has_old_nasal and final:gsub("ⁿ$", "") or
has_new_nasal and final:gsub("nn$", "") or
final
return base_final, has_old_nasal, has_new_nasal
end
local sandhi_rules = {
pt = {
["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["3"] = {["1"]="5", ["2"]="2", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="2"},
["4"] = {["1"]="S1", ["2"]="4", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="4"},
["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["6A"] = {["1"]="S7", ["2"]="S7", ["3"]="S7", ["4"]="S7", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S7", ["7B"]="S7"},
["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="6A", ["7B"]="6A"},
["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
},
jk = {
["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["3"] = {["1"]="5", ["2"]="2", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="2"},
["4"] = {["1"]="S1", ["2"]="4", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="4"},
["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["6A"] = {["1"]="S7", ["2"]="S7", ["3"]="S7", ["4"]="S7", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S7", ["7B"]="S7"},
["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="6A", ["7B"]="6A"},
["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
},
nr = {
["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["3"] = {["1"]="5", ["2"]="2", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="2"},
["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["6A"] = {["1"]="S7", ["2"]="S7", ["3"]="S7", ["4"]="S7", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S7", ["7B"]="S7"},
["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="6A", ["7B"]="6A"},
["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
},
xy = {
["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["3"] = {["1"]="5", ["2"]="S1", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="S1"},
["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["6A"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="6A", ["7B"]="6A"},
["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
},
yy = {
["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["3"] = {["1"]="5", ["2"]="S1", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="S1"},
["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["6A"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="6A", ["7B"]="6A"},
["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
},
ft = {
["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="5"},
["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["3"] = {["1"]="5", ["2"]="S1", ["3"]="5", ["4"]="5", ["5"]="2", ["6A"]="2", ["6B"]="2", ["7A"]="5", ["7B"]="S1"},
["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["6A"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
["6B"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="S1", ["7B"]="S1"},
["7A"] = {["1"]="6A", ["2"]="6A", ["3"]="6A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="6A", ["7B"]="6A"},
["7B"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6A"]="4", ["6B"]="4", ["7A"]="5", ["7B"]="5"},
["S3"] = {["1"]="7A", ["2"]="7A", ["3"]="7A", ["4"]="7A", ["5"]="7A", ["6A"]="7A", ["6B"]="7A", ["7A"]="7A", ["7B"]="7A"},
}
}
local initial_assimilation_rules = {
pt = {
nasal_final = {
["b"] = "m", ["p"] = "m", ["m"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
},
glottal_final = {}, -- remain unchanged
other_final = {
["b"] = "", ["p"] = "",
["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
}
},
jk = {
nasal_final = {
["b"] = "m", ["p"] = "m", ["m"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
},
glottal_final = {},
other_final = {
["b"] = "", ["p"] = "",
["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
}
},
nr = {
nasal_final = {
["b"] = "m", ["p"] = "m", ["m"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
},
glottal_final = {},
other_final = {
["b"] = "", ["p"] = "",
["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
}
},
xy = {
nasal_final = {
["b"] = "m", ["p"] = "m", ["m"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
},
nasalized_final = {
["b"] = "m", ["m"] = "m", ["p"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "", ["k"] = "", ["h"] = "",
["ng"] = "ng",
[""] = ""
},
glottal_final = {},
other_final = {
["b"] = "w", ["p"] = "w",
["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
}
},
yy = {
nasal_final = {
["b"] = "m", ["p"] = "m", ["m"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
},
nasalized_final = {
["b"] = "m", ["m"] = "m", ["p"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "", ["k"] = "", ["h"] = "",
["ng"] = "ng",
[""] = ""
},
glottal_final = {},
other_final = {
["b"] = "w", ["p"] = "w",
["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
}
},
ft = {
nasal_final = {
["b"] = "m", ["p"] = "m", ["m"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
},
nasalized_final = {
["b"] = "m", ["m"] = "m", ["p"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "", ["k"] = "", ["h"] = "",
["ng"] = "ng",
[""] = ""
},
glottal_final = {}, -- remain unchanged
other_final = {
["b"] = "w", ["p"] = "w",
["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
}
},
}
local buc_initials = {
["b"] = "b",
["p"] = "p",
["m"] = "m",
["d"] = "d",
["t"] = "t",
["n"] = "n",
["l"] = "l",
["z"] = "c",
["c"] = "ch",
["s"] = "s",
["g"] = "g",
["k"] = "k",
["ng"] = "ng",
["h"] = "h",
[""] = ""
}
local buc_finals = {
["a"] = {{"a", 1}, {"aⁿ", 1}, {"ah", 1}},
["ae"] = {{"e", 1}},
["ah"] = {{"ah", 1}},
["ai"] = {{"ai", 1}},
["ang"] = {{"ang", 1}},
["ao"] = {{"au", 1}},
["e"] = {{"a̤", 1}, {"a̤ⁿ", 1}, {"a̤h", 1}},
["eh"] = {{"eh", 1}},
["eng"] = {{"eng", 1}},
["i"] = {{"i", 1}, {"ih", 1}},
["ia"] = {{"ia", 2}, {"iaⁿ", 2}, {"iah", 2}},
["iah"] = {{"iah", 2}},
["ieh"] = {{"iah", 2}},
["ieng"] = {{"iang", 2}},
["ieo"] = {{"a̤u", 2}, {"a̤uⁿ", 2}, {"a̤uh", 2}}, -- on `u`
["ih"] = {{"ih", 1}},
["ing"] = {{"ing", 1}},
["iu"] = {{"iu", 2}},
["ng"] = {{"ng", 1}}, -- actually in the middle of `n` and `g`
["o"] = {{"eo", 2}, {"eoh", 2}},
["oe"] = {{"e̤", 1}, {"e̤ⁿ", 1}},
["oeh"] = {{"e̤h", 1}},
["oeng"] = {{"e̤ng", 1}},
["oh"] = {{"eoh", 2}},
["ong"] = {{"eong", 2}},
["or"] = {{"o̤", 1}, {"o̤ⁿ", 1}, {"o̤h", 1}},
["orh"] = {{"o̤h", 1}},
["orng"] = {{"o̤ng", 1}},
["ou"] = {{"o", 1}},
["u"] = {{"u", 1}},
["ua"] = {{"ua", 2}, {"uaⁿ", 2}, {"uah", 2}},
["uah"] = {{"uah", 2}},
["uang"] = {{"uang", 2}},
["uei"] = {{"oi", 1}, {"uai", 2}, {"oiⁿ", 1}, {"oih", 1}}, -- on `o`
["uh"] = {{"uh", 1}},
["ui"] = {{"ui", 1}}, -- on `u`
["ung"] = {{"ng", 1}}, -- actually in the middle of `n` and `g`
["y"] = {{"ṳ", 1}},
["yh"] = {{"ṳh", 1}},
["yng"] = {{"ṳng", 1}},
["yor"] = {{"io̤", 2}, {"io̤ⁿ", 2}, {"io̤h", 2}},
["yorh"] = {{"io̤h", 2}},
["yorng"] = {{"io̤ng", 2}}
}
local buc_tones = {
["1"] = "", -- 陰平 null
["2"] = "́", -- 陽平 u+0301
["3"] = "̂", -- 上聲 u+0302
["4"] = "̍", -- 陰去 u+030D
["5"] = "̄", -- 陽去 u+0304
["6A"] = "", -- 陰入甲 -h
["6B"] = "̄", -- 陰入乙
["7A"] = "̍", -- 陽入甲 -h + u+030D
["7B"] = "̍", -- 陽入乙 -h + u+030D
}
local function split_dialect_codes(code_string)
local codes = {}
for code in code_string:gmatch("[^,]+") do
-- Validate dialect code
if not dialects[code] then
error("Unsupported dialect: " .. code)
end
table.insert(codes, code)
end
return codes
end
local function get_syllable_markers(syllable)
local markers = {
capitalize = false,
space_after = false,
comma_after = false,
manual_buc = nil
}
if syllable:sub(1, 1) == SPECIAL_MARKERS.CAPITALIZATION then
markers.capitalize = true
syllable = syllable:sub(2)
end
if syllable:sub(-1) == SPECIAL_MARKERS.SPACE_AFTER then
markers.space_after = true
syllable = syllable:sub(1, -2)
end
if syllable:sub(-1) == "," then
markers.comma_after = true
syllable = syllable:sub(1, -2)
end
-- Check manual BUC
local manual_start, manual_end = syllable:find("{[^}]+}")
if manual_start then
markers.manual_buc = syllable:sub(manual_start + 1, manual_end - 1)
syllable = syllable:sub(1, manual_start - 1) .. syllable:sub(manual_end + 1)
end
return markers, syllable
end
local function split_initial_final(options)
if not options or not options.form then
error("split_initial_final: form is required")
end
local form = options.form
local initial, final
if form == "ng" then
initial, final = "", form
elseif form:sub(1, 2) == "ng" and #form > 2 then
initial, final = "ng", form:sub(3)
else
initial = form:match("^[bpmnltdzcsghkw]h?") or ""
final = form:sub(#initial + 1)
end
if not final or final == "" then
error("Invalid form: " .. form .. " (unable to extract final)")
end
return initial, final
end
-- Phonological rule application functions
local function get_final_type(options)
if not options or type(options) ~= "table" then
error("get_final_type: options must be a table")
end
local initial = options.initial
local final = options.final
local dialect = options.dialect
if not final then
error("get_final_type: final cannot be nil")
end
if sub(final, -2) == "ng" then
return "nasal_final"
elseif sub(final, -1) == "h" then
return "glottal_final"
elseif sub(final, -2) == "nn" or
(match(initial, "[mn]g?") and
get_final_type({initial = "", final = final}) == "other_final" and
dialect == "xy") then
if match(initial, "[mn]g?") and
get_final_type({initial = "", final = final}) == "other_final" and
dialect == "xy" then
require("Module:debug").track('cpx-pron/xy-nasal-initial/default-rule')
end
return "nasalized_final"
else
return "other_final"
end
end
local function track_buc_issue(reason)
require("Module:debug").track('cpx-pron/' .. reason)
end
local function combine_buc_syllable(options)
local initial = options.initial
local final = options.final
local tone = options.tone
local tone_position = options.tone_position
local tone_mark = buc_tones[tone]
if not tone_mark then
error("Invalid tone: " .. tone)
end
-- Split the final string into character table
local chars = {}
for char in mw.ustring.gmatch(final, ".") do
table.insert(chars, char)
end
-- put tone diacritic
if #chars >= tone_position then
chars[tone_position] = chars[tone_position] .. tone_mark
else
error("Invalid tone position: " .. tone_position)
end
return mw.ustring.toNFC(initial .. table.concat(chars))
end
local function lookup_char_readings(char)
if not m_data.buc[char] then
return nil
end
return m_data.buc[char]
end
-- Convert single PSP syllable to BUC
local function convert_to_buc_syllable(options)
local syllable_info = options.syllable_info
local char = options.char
-- If BUC is manually specified, first verify
if syllable_info.manual_buc then
local is_valid, error_msg = validate_manual_buc(syllable_info.manual_buc)
if not is_valid then
track_buc_issue("manual form incorrect")
return nil
end
return syllable_info.manual_buc
end
local lookup_tone = syllable_info.original_tone
local lookup_final = syllable_info.original_final
-- Special handling for S3 tone
if syllable_info.original_tone == "S3" then
lookup_tone = "3"
-- Remove final h if present
if lookup_final:sub(-1) == "h" then
lookup_final = lookup_final:sub(1, -2)
end
end
-- Get possible BUC finals
local possible_finals = buc_finals[lookup_final]
if not possible_finals then
track_buc_issue("no final found")
return nil
end
-- Get BUC initial
local initial = buc_initials[syllable_info.original_initial]
if not initial then
track_buc_issue("no initial found")
return nil
end
-- Generate all possible BUC forms
local filtered_finals = {} -- special check for BUC tone 7B which merged into tone 2
for _, final_info in ipairs(possible_finals) do
local final, tone_position = final_info[1], final_info[2]
local is_tone_7b_final = final:match("h$")
local psp_has_h = syllable_info.original_final:match("h$")
local should_keep = true
local use_tone = lookup_tone
if lookup_tone == "7B" then
if final:match("h$") then
final = final .. "*"
else
should_keep = false
end
end
if is_tone_7b_final and not psp_has_h then
if lookup_tone == "2" then
use_tone = "7B"
final = final .. "*"
elseif lookup_tone == "7B" then
-- do nothing
else
should_keep = false
end
end
if should_keep then
table.insert(filtered_finals, {
final = final,
tone_position = tone_position,
tone = use_tone
})
end
end
local candidates = {}
for _, final_info in ipairs(filtered_finals) do
local candidate = combine_buc_syllable({
initial = initial,
final = final_info.final,
tone = final_info.tone,
tone_position = final_info.tone_position
})
table.insert(candidates, candidate)
end
if #candidates == 1 then
return candidates[1]
end
-- No need to look up Hanzi-BUC table if hanzi's and PSP's counts don't match
if not char then
if #possible_finals > 1 then
track_buc_issue("contraction and multiple final found")
-- temp
local finals_for_output = {}
for _, final_info in ipairs(possible_finals) do
table.insert(finals_for_output, final_info[1])
end
return nil
end
return combine_buc_syllable({
initial = initial,
final = possible_finals[1][1],
tone = lookup_tone,
tone_position = possible_finals[1][2]
})
end
local char_readings = lookup_char_readings(char)
if not char_readings then
track_buc_issue("cannot look up table")
return nil
end
local matches = {}
for _, candidate in ipairs(candidates) do
for _, reading in ipairs(char_readings) do
local match = (candidate == reading)
if match then
table.insert(matches, candidate)
end
end
end
if #matches == 0 then
track_buc_issue("no matching reading found")
return nil
elseif #matches > 1 then
track_buc_issue("multiple matching readings found")
return nil
end
-- temp
if syllable_info.original_tone == "2" and matches[1]:match("h%*$") then
require("Module:debug").track('cpx-pron/2-to-7B')
end
return matches[1]
end
local function generate_buc(options)
if not options.syllable_infos then
error("Missing required syllable_infos in generate_buc")
end
if options.dialect ~= "pt" then
return nil
end
local page_title = mw.title.getCurrentTitle().text
local chars = mw.ustring.gsub(page_title, "[\n\r\t ,]", "")
local char_count = mw.ustring.len(chars)
local syllable_count = #options.syllable_infos
local check_char_table = (syllable_count == char_count)
local buc_syllables = {}
for i, syllable_info in ipairs(options.syllable_infos) do
if syllable_info.manual_buc then
table.insert(buc_syllables, syllable_info.manual_buc)
else
-- Get possible BUC
local syllable_result = convert_to_buc_syllable({
syllable_info = syllable_info,
char = check_char_table and mw.ustring.sub(chars, i, i) or nil,
word = options.word
})
-- If any syllable cannot be uniquely identified
if not syllable_result then
return nil
end
if syllable_info.capitalize then
local normalized = mw.ustring.toNFD(syllable_result)
local first_char = mw.ustring.sub(normalized, 1, 1)
syllable_result = mw.ustring.toNFC(
mw.ustring.upper(first_char) ..
mw.ustring.sub(normalized, 2)
)
end
table.insert(buc_syllables, syllable_result)
end
end
-- concat syllables
local result = {}
for i = 1, #buc_syllables do
table.insert(result, buc_syllables[i])
if i < #buc_syllables then
if options.syllable_infos[i].comma_after then
table.insert(result, ", ")
elseif options.syllable_infos[i].space_after then
table.insert(result, " ")
else
table.insert(result, "-")
end
end
end
return table.concat(result)
end
local function split_syllable(syllable)
-- Initialize result table
local components = {
orig_form = nil,
changed_form = nil,
tone_part = nil,
orig_initial = nil,
orig_final = nil,
changed_initial = nil,
changed_final = nil,
orig_tone = nil,
manual_sandhi_tone = nil,
no_sandhi = false,
no_assimilation = false,
-- BUC (only for Putian)
capitalize = false,
space_after = false,
comma_after = false,
manual_buc = nil
}
if not syllable or syllable == "" then
error("Invalid syllable: " .. tostring(syllable))
end
local markers, cleaned_syllable = get_syllable_markers(syllable)
components.capitalize = markers.capitalize
components.space_after = markers.space_after
components.comma_after = markers.comma_after
components.manual_buc = markers.manual_buc
syllable = cleaned_syllable
components.no_assimilation = syllable:sub(1, 1) == SPECIAL_MARKERS.NO_ASSIMILATION
if components.no_assimilation then
syllable = syllable:sub(2)
end
components.no_sandhi = syllable:sub(-1) == SPECIAL_MARKERS.NO_SANDHI
if components.no_sandhi then
syllable = syllable:sub(1, -2)
end
if syllable:find(SPECIAL_MARKERS.MANUAL_CHANGE) then
components.orig_form, components.changed_form, components.tone_part =
syllable:match("(.-)>(.-)([1-7S]+.*)$")
else
components.orig_form, components.tone_part =
syllable:match("(.-)([1-7S]+.*)$")
components.changed_form = components.orig_form
end
-- If the segmentation is not correct
if not components.orig_form or not components.tone_part then
error("Invalid syllable format: " .. syllable)
end
-- Process form components
components.orig_initial, components.orig_final =
split_initial_final({form = components.orig_form})
components.changed_initial, components.changed_final =
split_initial_final({form = components.changed_form})
-- Process tone components
if components.tone_part:find("-") then
components.orig_tone, components.manual_sandhi_tone =
components.tone_part:match("^([1-7S]+)%-([1-7S]+)$")
require("Module:debug").track('cpx-pron/manual sandhi tone')
else
components.orig_tone = components.tone_part
end
-- Special tone processing
if components.orig_tone == "3" and components.changed_final:sub(-1) == "h" then
components.orig_tone = "S3"
end
if components.orig_tone == "6" then
if components.orig_final:sub(-1) == "h" then
components.orig_tone = "6A"
else
components.orig_tone = "6B"
end
elseif components.orig_tone == "7" then
if components.orig_final:sub(-1) == "h" then
components.orig_tone = "7A"
else
components.orig_tone = "7B"
end
end
-- final validation
if not (components.orig_initial and components.orig_final and components.orig_tone) then
error("Unable to parse syllable: " .. syllable)
end
return components
end
local function create_syllable_info(options)
local syllable_components = split_syllable(options.syllable)
return {
original_initial = syllable_components.orig_initial,
original_final = syllable_components.orig_final,
original_tone = syllable_components.orig_tone,
changed_initial = syllable_components.changed_initial,
changed_final = syllable_components.changed_final,
changed_tone = syllable_components.orig_tone, -- default: original tone
no_sandhi = syllable_components.no_sandhi,
no_assimilation = syllable_components.no_assimilation,
is_first_syllable = options.is_first_syllable,
manual_sandhi_tone = syllable_components.manual_sandhi_tone,
-- BUC
capitalize = syllable_components.capitalize,
space_after = syllable_components.space_after,
comma_after = syllable_components.comma_after,
manual_buc = syllable_components.manual_buc
}
end
-- Syllable processing functions
local function create_syllable_infos(options)
local syllable_infos = {}
for syllable in options.word:gmatch("%S+") do
local syllable_options = {
syllable = syllable,
is_first_syllable = #syllable_infos == 0
}
table.insert(syllable_infos, create_syllable_info(syllable_options))
end
return syllable_infos
end
local function post_process_nasalization(options)
local syllable = options.syllable
-- Remove duplicate nasalization
if syllable.changed_initial:match("^[mn]g?") then
if syllable.changed_final:match("nn$") then
syllable.changed_final = syllable.changed_final:gsub("nn$", "")
end
end
-- Simplify ng-initial syllables
if syllable.changed_initial == "ng" and
syllable.changed_final == "ng" then
syllable.changed_initial = ""
end
end
local function get_sandhi_tone(options)
local curr_syllable = options.curr_syllable
local next_syllable = options.next_syllable
local dialect = options.dialect
-- Handle manual tone specification
if curr_syllable.manual_sandhi_tone then
return curr_syllable.manual_sandhi_tone
end
-- Handle final syllable
if not next_syllable then
return curr_syllable.original_tone
end
-- Apply sandhi rules
local current_tone = curr_syllable.original_tone
local next_tone = next_syllable.original_tone
return sandhi_rules[dialect][current_tone][next_tone] or
curr_syllable.original_tone
end
local function apply_sandhi(options)
local dialect = options.dialect
local syllable_infos = options.syllable_infos
for i = 1, #syllable_infos do
local curr_syllable = syllable_infos[i]
local next_syllable = syllable_infos[i + 1]
local original_tone = curr_syllable.original_tone
if curr_syllable.manual_sandhi_tone then
if curr_syllable.manual_sandhi_tone == "6" then
curr_syllable.manual_sandhi_tone = curr_syllable.changed_final:sub(-1) == "h" and "6A" or "6B"
elseif curr_syllable.manual_sandhi_tone == "7" then
curr_syllable.manual_sandhi_tone = curr_syllable.changed_final:sub(-1) == "h" and "7A" or "7B"
elseif curr_syllable.manual_sandhi_tone == "3" and curr_syllable.changed_final:sub(-1) == "h" then
curr_syllable.manual_sandhi_tone = "S3"
end
end
-- No sandhi if one of the following conditions are met
-- 1. there is a no_sandhi mark
-- 2. syllable followed by a comma
-- 3. is the last syllable
if curr_syllable.no_sandhi or
curr_syllable.comma_after or
-- curr_syllable.space_after or
not next_syllable then
curr_syllable.changed_tone = curr_syllable.original_tone
else
curr_syllable.changed_tone = get_sandhi_tone({
curr_syllable = curr_syllable,
next_syllable = next_syllable,
dialect = dialect
})
end
-- Special tone adjustment for glottal finals
if curr_syllable.changed_tone == '3' and
curr_syllable.changed_final:sub(-1) == 'h' then
curr_syllable.changed_tone = 'S3'
end
-- Tracking
if next_syllable then
local format_tone_for_tracking = function(tone)
if tone == "S1" or tone == "S3" then
return tone
elseif tone:sub(1, 1) == "S" then
return tone:sub(2)
else
return tone
end
end
local track_original_tone = format_tone_for_tracking(original_tone)
local track_next_tone = format_tone_for_tracking(next_syllable.original_tone)
local track_changed_tone = format_tone_for_tracking(curr_syllable.changed_tone)
require("Module:debug").track('cpx-pron/sandhi/' .. dialect .. '/' ..
track_original_tone .. '+' .. track_next_tone .. '/' ..
track_changed_tone)
end
end
end
local function apply_initial_assimilation(options)
local dialect = options.dialect
local syllable_infos = options.syllable_infos
local result = {}
-- Handle first syllable
result[1] = syllable_infos[1]
result[1].is_first_syllable = true
-- Process subsequent syllables
for i = 2, #syllable_infos do
local prev_syllable = result[i-1]
local curr_syllable = syllable_infos[i]
-- Store original initial for tracking
local original_initial = curr_syllable.original_initial
local rule_applied = false
-- Check for manual override in xy dialect with nasal initial + other final
local is_manual_override = (dialect == "xy" or dialect == "yy" or dialect == "ft") and
prev_syllable.changed_initial:match("^[mn]g?$") and
not (prev_syllable.changed_final:sub(-2) == "ng" or
prev_syllable.changed_final:sub(-1) == "h" or
prev_syllable.changed_final:sub(-2) == "nn") and
curr_syllable.changed_initial ~= curr_syllable.original_initial
if is_manual_override then
require("Module:debug").track('cpx-pron/xy-nasal-initial/manual-override')
end
local original_final_type = get_final_type({
initial = prev_syllable.changed_initial,
final = prev_syllable.changed_final,
dialect = dialect
})
-- No initial assimilation if one of the following conditions are met:
-- 1. there is a no_assimilation mark
-- 2. the previous syllable is followed by a comma
if not curr_syllable.no_assimilation and
not prev_syllable.comma_after and
curr_syllable.changed_initial == curr_syllable.original_initial then
local final_type = original_final_type
-- Special rule for nasalized finals
local should_apply_nasal_rule =
final_type == "other_final" and
curr_syllable.original_initial:match("^[bpdtzcs]") and
get_final_type({
initial = curr_syllable.original_initial,
final = curr_syllable.original_final,
dialect = dialect
}) == "nasalized_final"
if should_apply_nasal_rule then
final_type = "nasal_final"
end
-- Mark that we are applying an assimilation rule
rule_applied = true
-- Apply assimilation rules
curr_syllable.changed_initial =
initial_assimilation_rules[dialect][final_type][curr_syllable.original_initial] or
curr_syllable.original_initial
require("Module:debug").track('cpx-pron/assimilation/' .. dialect .. '/' .. original_final_type .. '/' .. original_initial .. '/' .. curr_syllable.changed_initial)
end
-- Track assimilation only for manual override cases
if not rule_applied and curr_syllable.changed_initial ~= original_initial then
require("Module:debug").track('cpx-pron/assimilation/' .. dialect .. '/' .. original_final_type .. '/' .. original_initial .. '/' .. curr_syllable.changed_initial)
end
-- Post-process nasalization
local post_process_options = {
syllable = curr_syllable,
dialect = dialect
}
post_process_nasalization(post_process_options)
table.insert(result, curr_syllable)
end
return result
end
local function generate_actual_pronunciation(syllable_infos)
local pronunciations = {}
for _, syllable in ipairs(syllable_infos) do
-- Combine the changed components
local pronunciation = syllable.changed_initial ..
syllable.changed_final ..
syllable.changed_tone
table.insert(pronunciations, pronunciation)
end
return table.concat(pronunciations, " ")
end
local function generate_actual_pronunciation(syllable_infos)
local pronunciations = {}
for _, syllable in ipairs(syllable_infos) do
-- Combine the changed components
local pronunciation = syllable.changed_initial ..
syllable.changed_final ..
syllable.changed_tone
table.insert(pronunciations, pronunciation)
end
return table.concat(pronunciations, " ")
end
local function get_ipa_value(options)
-- Validation
if not options.type or not options.dialect or not options.value then
error("Missing required parameter for IPA lookup")
end
-- Get the appropriate lookup table
local lookup_tables = {
initials = initials,
finals = finals,
tones = tones
}
local table = lookup_tables[options.type]
if not table then
error("Invalid lookup type: " .. options.type)
end
if options.type == "initials" and options.value == "bh" then
error(string.format(
'Invalid initial "bh" for %s dialect. Please use "w" instead.',
options.dialect
))
end
if options.type == "tones" and options.value == "S5" then
error('Invalid tone S5. Please use "6" instead.')
end
if options.type == "finals" then
local base_final, has_old_nasal, has_new_nasal = handle_nasalization(options.value)
local nasal_suffix = ""
if has_old_nasal then
nasal_suffix = "nn"
require("Module:debug").track('cpx-pron/deprecated-nasalization')
elseif has_new_nasal then
nasal_suffix = "nn"
end
local result = table[options.dialect] and table[options.dialect][options.value]
if not result then
local corrected_final = nil
if corrections.common[base_final] then
corrected_final = corrections.common[base_final]
elseif corrections[options.dialect] and corrections[options.dialect][base_final] then
corrected_final = corrections[options.dialect][base_final]
elseif has_old_nasal and not has_new_nasal then
corrected_final = base_final
nasal_suffix = "nn"
end
if corrected_final or (has_old_nasal and not has_new_nasal) then
local corrected_value = corrected_final or base_final
if has_old_nasal or has_new_nasal then
corrected_value = corrected_value .. nasal_suffix
end
local full_syllable = ""
if options.syllable_info then
full_syllable = options.syllable_info.original_initial .. corrected_value .. options.syllable_info.original_tone
end
error(string.format(
'Invalid final "%s" for %s dialect. Please use "%s" instead.',
options.value,
options.dialect,
full_syllable ~= "" and full_syllable:gsub("[AB]", "") or corrected_value
))
end
end
end
local result = table[options.dialect] and table[options.dialect][options.value]
if not result then
error(string.format(
"Invalid %s %s for %s.",
options.type:sub(1, -2),
options.value,
options.dialect
))
end
return result
end
local function get_ipa_components(options)
local syllable_info = options.syllable_info
local dialect = options.dialect
-- Get basic components
local components = {
initial = get_ipa_value({
type = "initials",
dialect = dialect,
value = syllable_info.changed_initial,
syllable_info = syllable_info
}),
final = get_ipa_value({
type = "finals",
dialect = dialect,
value = syllable_info.changed_final,
syllable_info = syllable_info
}),
tone = get_ipa_value({
type = "tones",
dialect = dialect,
value = syllable_info.original_tone,
syllable_info = syllable_info
})
}
-- Handle tone change
if syllable_info.changed_tone ~= syllable_info.original_tone then
local sandhi_tone = get_ipa_value({
type = "tones",
dialect = dialect,
value = syllable_info.changed_tone
})
if not sandhi_tone then
error("Invalid sandhi tone: " .. syllable_info.changed_tone ..
" for dialect: " .. dialect)
end
components.tone = components.tone .. "⁻" .. sandhi_tone
end
return components
end
local function get_original_initial_display(options)
local syllable_info = options.syllable_info
local dialect = options.dialect
-- Only show original initial for non-first syllables with changes
if syllable_info.is_first_syllable or
(syllable_info.original_initial == syllable_info.changed_initial) then
return ""
end
-- Get IPA for original initial
local ipa_initial = get_ipa_value({
type = "initials",
dialect = dialect,
value = syllable_info.original_initial,
syllable_info = syllable_info
})
return syllable_info.original_initial == "" and "<sup>(Ø-)</sup>" or
"<sup>(" .. ipa_initial .. "-)</sup>"
end
local function syllable_to_ipa(options)
local syllable_info = options.syllable_info
local dialect = options.dialect
-- Get IPA components
local ipa_components = get_ipa_components({
syllable_info = syllable_info,
dialect = dialect
})
-- Generate display for changed initial if needed
local original_initial_display = get_original_initial_display({
syllable_info = syllable_info,
dialect = dialect
})
-- Combine all parts
return original_initial_display ..
ipa_components.initial ..
ipa_components.final ..
ipa_components.tone
end
-- Generate IPA for the syllables
local function generate_ipa(options)
if not options or not options.syllable_infos then
error("Missing required syllable_infos in generate_ipa")
end
local syllable_infos = options.syllable_infos
local dialect = options.dialect
local ipa_parts = {}
for _, syllable_info in ipairs(syllable_infos) do
table.insert(ipa_parts, syllable_to_ipa({
syllable_info = syllable_info,
dialect = dialect
}))
end
return table.concat(ipa_parts, " ")
end
-- Process a single pronunciation entry
local function process_pronunciation(options)
local result = {
dialect_codes = options.dialect_codes,
word = options.word,
processed = {},
index = options.index
}
local dialect_list = split_dialect_codes(options.dialect_codes)
-- Create syllable info objects for each syllable
local syllable_options = {
word = options.word,
is_first_syllable = true
}
local original_syllable_infos = create_syllable_infos(syllable_options)
-- Process for each dialect
for i, dialect in ipairs(dialect_list) do
local syllable_infos = m_table.deepCopy(original_syllable_infos)
-- Apply phonological rules
local processed_syllables = apply_initial_assimilation({
dialect = dialect,
syllable_infos = syllable_infos
})
apply_sandhi({
dialect = dialect,
syllable_infos = processed_syllables
})
-- Generate IPA and collect results
local pronunciation_result = {
dialect = dialect,
dialect_position = i,
original = options.word,
actual = generate_actual_pronunciation(processed_syllables),
ipa = generate_ipa({
syllable_infos = processed_syllables,
dialect = dialect
}),
index = options.index,
syllable_infos = processed_syllables
}
-- Generate BUC only for Putian
if dialect == "pt" then
pronunciation_result.buc = generate_buc({
syllable_infos = processed_syllables,
dialect = dialect,
word = options.word
})
end
table.insert(result.processed, pronunciation_result)
end
return result
end
-- Formatting helper functions
local function font_consolas(text)
-- return '<span style="font-family: Consolas, monospace;">' .. text .. '</span>'
return '<span class="zhpron-monospace">' .. text .. '</span>'
end
local function font_ipa(text)
return '<span class="IPA">/' .. text .. '/</span>'
end
local function clear_pinging_format(text)
if not text then
return ""
end
return text:gsub("%-S?%d", "") -- remove tone sandhi
:gsub(">[a-zⁿ]+", "") -- remove irregular sound change
:gsub("[#*^\\]+", "") -- remove special symbols
:gsub("{[^}]+}", "") -- remove manual BUC
:gsub("[AB]", "") -- remove A/B
:gsub("(%d)", "<sup>%1</sup>") -- superscript tone numbers
:gsub("S", "") -- remove "S" in special tones
end
-- Output formatting functions
local function format_demo_output(options)
local results = options.results
local highlight_type = options.type or "default"
local output = {}
for _, result in ipairs(results) do
for _, processed in ipairs(result.processed) do
local syllable_infos = processed.syllable_infos
local orig_parts = {}
local actual_parts = {}
-- Process each syllable
for i, syllable_info in ipairs(syllable_infos) do
local is_first = (i == 1)
local is_last = (i == #syllable_infos)
-- Process original syllable
local orig_initial = syllable_info.original_initial
local orig_final = syllable_info.original_final
local orig_tone = syllable_info.original_tone:gsub("S", "")
local orig_text = orig_initial .. orig_final
-- Apply bold formatting based on highlight_type
if highlight_type == "assim" and not is_first and orig_initial ~= "" then
orig_text = "'''" .. orig_initial .. "'''" .. orig_final
elseif highlight_type == "sandhi" and not is_last then
orig_tone = "'''" .. orig_tone .. "'''"
end
-- Add tone as superscript and clear format
orig_text = orig_text .. orig_tone:gsub("(%d)", "<sup>%1</sup>"):gsub("[AB]", "")
table.insert(orig_parts, orig_text)
-- Process actual syllable
local actual_initial = syllable_info.changed_initial
local actual_final = syllable_info.changed_final
local actual_tone = syllable_info.changed_tone:gsub("S", "")
local actual_text = actual_initial .. actual_final
-- Apply bold formatting based on highlight_type
if highlight_type == "assim" and not is_first and actual_initial ~= "" then
-- Bold non-first syllable initials for assim type
actual_text = "'''" .. actual_initial .. "'''" .. actual_final
elseif highlight_type == "sandhi" and not is_last then
-- Bold non-last syllable tones for sandhi type
actual_tone = "'''" .. actual_tone .. "'''"
end
-- Add tone as superscript and clear format
actual_text = actual_text .. actual_tone:gsub("(%d)", "<sup>%1</sup>"):gsub("[AB]", "")
table.insert(actual_parts, actual_text)
end
-- Build the output line
local line = table.concat(orig_parts, " ")
line = line .. " → " .. table.concat(actual_parts, " ")
-- Add IPA
line = line .. "<br/>" .. font_ipa(processed.ipa)
table.insert(output, line)
end
end
return table.concat(output, "\n\n")
end
local function format_brief_output(options)
local results = options.results
local output_parts = {}
local dialect_codes = {}
local seen_pronunciations = {}
local order = {}
-- Collecte Pronunciation and dialect codes in their original order
for _, result in ipairs(results) do
if result.processed and #result.processed > 0 then
local original = result.processed[1].original
local cleared_text = clear_pinging_format(original)
-- If the cleaned pronunciation has not appeared before, record its order
if not seen_pronunciations[cleared_text] then
seen_pronunciations[cleared_text] = {
original = original,
index = result.index
}
table.insert(order, cleared_text)
end
-- Collect dialect codes
for _, processed in ipairs(result.processed) do
if not dialect_codes[processed.dialect] then
dialect_codes[processed.dialect] = true
end
end
end
end
local dialect_codes_array = {}
for code, _ in pairs(dialect_codes) do
table.insert(dialect_codes_array, code)
end
local output = " " -- "Puxian Min" already written in zh-pron
if #dialect_codes_array == 1 then
output = output .. "<small>(<i>" .. dialects[dialect_codes_array[1]] .. ", "
else
output = output .. "<small>(<i>"
end
output = output .. "[[Wiktionary:About Chinese/Puxian Min|Pouseng Ping'ing]]</i>): </small>"
-- Generate the pronunciation parts in the original order
if #order > 0 then
local formatted = {}
for _, cleared_text in ipairs(order) do
table.insert(formatted, cleared_text)
end
output = output .. font_consolas(table.concat(formatted, " / "))
end
return output
end
function format_complete_output(options)
local results = options.results
local output = {}
local ordered_results = {}
for _, result in ipairs(results) do
table.insert(ordered_results, result)
end
table.sort(ordered_results, function(a, b)
return a.index < b.index
end)
for _, result in ipairs(ordered_results) do
local grouped = {}
local group_keys = {}
for _, processed in ipairs(result.processed) do
local key = processed.original .. "|" .. processed.actual .. "|" .. processed.ipa
if not grouped[key] then
grouped[key] = {
data = {
original = processed.original,
actual = processed.actual,
ipa = processed.ipa,
buc = processed.buc,
dialect = processed.dialect
},
dialects = {}
}
table.insert(group_keys, key)
elseif not grouped[key].data.buc and processed.buc then
grouped[key].data.buc = processed.buc
grouped[key].data.dialect = processed.dialect
end
table.insert(grouped[key].dialects, {
code = processed.dialect,
position = processed.dialect_position
})
end
for _, key in ipairs(group_keys) do
local group = grouped[key]
table.sort(group.dialects, function(a, b)
return a.position < b.position
end)
local dialect_names = {}
for _, dialect_info in ipairs(group.dialects) do
table.insert(dialect_names, dialects[dialect_info.code])
end
table.insert(output, "\n** <small>(<i>" .. table.concat(dialect_names, ", ") .. "</i>)</small>")
-- Pouseng Ping'ing
table.insert(output, "\n*** <small><i>[[Wiktionary:About Chinese/Puxian Min|Pouseng Ping'ing]]</i></small>: " ..
font_consolas(clear_pinging_format(group.data.original)))
if clear_pinging_format(group.data.original) ~= clear_pinging_format(group.data.actual) then
table.insert(output, font_consolas(
" [<small>Phonetic</small>: " .. clear_pinging_format(group.data.actual)) ..
"]")
end
-- BUC
if group.data.dialect == "pt" and group.data.buc then
local displayed_buc = group.data.buc:gsub("%*", "")
table.insert(output, "\n*** <small><i>[[w:Hinghwa Romanized|Báⁿ-uā-ci̍]]</i></small>: " ..
font_consolas(displayed_buc))
end
-- IPA
table.insert(output, '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]] ' ..
'<sup>([[w:Pu–Xian Min|key]])</sup></small>: ' .. font_ipa(group.data.ipa))
end
end
return table.concat(output)
end
-- Main entry point
function export.rom_display(text, mode, highlight_type)
if type(text) == "table" then
highlight_type = text.args.type
mode = text.args[2] or mode
text = text.args[1]
end
-- Parameter validation
if not text or text == "" then
error("Invalid input: text must be a non-empty string")
end
mode = mode or FORMAT_MODES.BRIEF
highlight_type = highlight_type or "default"
local pronunciation_data = {
results = {},
mode = mode,
type = highlight_type
}
-- Process each pronunciation in the input
local index = 1
for pronunciation in text:gmatch("[^/]+") do
local dialect_codes, word = pronunciation:match("^(.+):(.+)$")
if not dialect_codes or not word then
error("Invalid input format: " .. pronunciation)
end
local pron_options = {
dialect_codes = dialect_codes,
word = word,
index = index
}
table.insert(pronunciation_data.results,
process_pronunciation(pron_options))
index = index + 1
end
-- Format output according to the specified mode
if mode == FORMAT_MODES.BRIEF then
return format_brief_output(pronunciation_data)
elseif mode == FORMAT_MODES.COMPLETE then
return format_complete_output(pronunciation_data)
elseif mode == FORMAT_MODES.DEMO then
return format_demo_output(pronunciation_data)
else
error("Unsupported mode: " .. mode)
end
end
-- Convert single BUC syllable to PSP
local function syllable_to_psp(input)
local buc_to_psp_initials = {
["b"] = "b", ["ch"] = "c", ["c"] = "z",
["d"] = "d", ["g"] = "g", ["h"] = "h",
["k"] = "k", ["l"] = "l", ["m"] = "m",
["ng"] = "ng", ["n"] = "n", ["p"] = "p",
["s"] = "s", ["t"] = "t", [""] = ""
}
local buc_to_psp_finals = {
["a"] = "a",
["aⁿ"] = "a",
["ah"] = "ah",
["ah*"] = "a",
["ai"] = "ai",
["ang"] = "ang",
["au"] = "ao",
["a̤"] = "e",
["a̤ⁿ"] = "e",
["a̤h"] = "eh",
["a̤h*"] = "e",
["e"] = "ae",
["eh"] = "eh",
["eng"] = "eng",
["e̤"] = "oe",
["e̤ⁿ"] = "oe",
["e̤h"] = "oeh",
["e̤ng"] = "oeng",
["i"] = "i",
["ih"] = "ih",
["ih*"] = "i",
["ing"] = "ing",
["ia"] = "ia",
["iaⁿ"] = "ia",
["iah"] = "iah",
["iah*"] = "ia",
["iang"] = "ieng",
["iu"] = "iu",
["o"] = "ou",
["o̤"] = "or",
["o̤ⁿ"] = "or",
["o̤h"] = "orh",
["o̤h*"] = "or",
["o̤ng"] = "orng",
["eo"] = "o",
["eoh"] = "oh",
["eoh*"] = "o",
["eong"] = "ong",
["u"] = "u",
["uh"] = "uh",
["ua"] = "ua",
["uaⁿ"] = "ua",
["uah"] = "uah",
["uah*"] = "ua",
["uang"] = "uang",
["ui"] = "ui",
["uai"] = "uei",
["oi"] = "uei",
["oiⁿ"] = "uei",
["oih"] = "uei",
["oih*"] = "uei",
["ṳ"] = "y",
["ṳh"] = "yh",
["ṳng"] = "yng",
["io̤"] = "yor",
["io̤ⁿ"] = "yor",
["io̤h"] = "yorh",
["io̤h*"] = "yor",
["io̤ng"] = "yorng",
["ng"] = "ng",
["a̤u"] = "ieo",
["a̤uⁿ"] = "ieo",
["a̤uh"] = "ieoh",
["a̤uh*"] = "ieo"
}
-- Handle input parameter
local syllable
if type(input) == "table" then
syllable = input.args[1]
else
syllable = input
end
if not syllable or syllable == "" then
return syllable
end
-- Try to convert the syllable, return original if any error occurs
local success, result = pcall(function()
-- Decompose the syllable and check for validity
local decomposed = mw.ustring.toNFD(syllable)
if not decomposed then
return syllable
end
-- Extract and remove tone marks
local tone = ""
if decomposed:find("́") then -- Tone 2: COMBINING ACUTE ACCENT
tone = "2"
decomposed = decomposed:gsub("́", "")
elseif decomposed:find("̂") then -- Tone 3: COMBINING CIRCUMFLEX ACCENT
tone = "3"
decomposed = decomposed:gsub("̂", "")
elseif decomposed:find("̍") then -- Tone 4/7: COMBINING VERTICAL LINE ABOVE
if decomposed:find("h%*$") then -- Special case: -h* ending -> tone 2
if not decomposed:find("̍") then -- If has h* but no vertical line
return syllable
end
tone = "2"
elseif decomposed:find("h$") then
tone = "7"
else
tone = "4"
end
decomposed = decomposed:gsub("̍", "")
elseif decomposed:find("̄") then -- Tone 5: COMBINING MACRON
tone = "5"
decomposed = decomposed:gsub("̄", "")
else
-- No tone mark: either tone 1 (no -h) or tone 6 (with -h)
if decomposed:find("h$") and not decomposed:find("h%*$") then
tone = "6"
else
tone = "1"
end
end
-- Recompose and check validity
local normalized = mw.ustring.toNFC(decomposed)
if not normalized then
return syllable
end
-- Special case: standalone `ng` syllable after tone removal
if normalized == "ng" then
return "ng" .. tone
end
-- Extract initial
local initial = ""
if normalized:match("^[Cc][Hh]") then
initial = normalized:sub(1, 2):lower()
normalized = normalized:sub(3)
elseif normalized:match("^[Nn][Gg]") then
initial = normalized:sub(1, 2):lower()
normalized = normalized:sub(3)
elseif normalized:match("^[BbCcDdFfGgHhKkLlMmNnPpSsTt]") then
initial = normalized:sub(1, 1):lower()
normalized = normalized:sub(2)
end
local psp_initial = buc_to_psp_initials[initial] or ""
-- Process final
-- Remove -h* marker if present (affects tone but not final lookup)
local final = normalized:gsub("h%*$", "")
-- Look up PSP final
local psp_final = buc_to_psp_finals[final]
if not psp_final then
return syllable
end
-- Combine all parts to form complete PSP syllable
return (psp_initial .. psp_final .. tone):lower()
end)
-- Return original syllable if conversion failed
return success and result or syllable
end
-- Convert BUC to PSP (both single syllable and text)
function export.buc_to_psp(input)
-- Handle input parameter
local text
if type(input) == "table" then
text = input.args[1]
else
text = input
end
if not text or text == "" then
return text
end
-- Split text into parts by delimiters while keeping delimiters
local parts = {}
local last_pos = 1
local pattern = "[%s%-%.,;:!%?,。;:!?「」『』、]"
for pos, delimiter in mw.ustring.gmatch(text, "()("..pattern..")") do
if pos > last_pos then
table.insert(parts, mw.ustring.sub(text, last_pos, pos - 1))
end
table.insert(parts, delimiter)
last_pos = pos + mw.ustring.len(delimiter)
end
-- Handle the last part
if last_pos <= mw.ustring.len(text) then
table.insert(parts, mw.ustring.sub(text, last_pos))
end
-- Convert syllables and keep delimiters
for i = 1, #parts do
if not parts[i]:match("^[%s%-%.,;:!%?,。;:!?「」『』、]$") then
parts[i] = syllable_to_psp(parts[i])
end
end
return table.concat(parts)
end
return export