বিষয়বস্তুতে চলুন

মডিউল:zh-translit

উইকিঅভিধান, মুক্ত অভিধান থেকে

এই মডিউলের জন্য মডিউল:zh-translit/নথি-এ নথিপত্র তৈরি করা হয়ে থাকতে পারে

local m_str_utils = require("Module:string utilities")
local m_utils = require("Module:utilities")

local findTemplates = require("Module:template parser").findTemplates
local get_section = m_utils.get_section
local gsub = string.gsub
local insert = table.insert
local safe_require = m_utils.safe_require
local sub = string.sub
local toNFD = mw.ustring.toNFD
local trim = mw.text.trim
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local usub = m_str_utils.sub
local uupper = m_str_utils.upper

local frame = mw.getCurrentFrame()
local tag

local lect_code = mw.loadData("Module:zh/data/lect codes").langcode_to_abbr

local export = {}

local function fail(lang, request)
	require("Module:debug/track")("zh-translit/needs manual translit/" .. lang)
	return nil
end

local function get_content(title)
	local content = mw.title.new(title)
	if not content then
		return false
	end
	return get_section(content:getContent(), "Chinese", 2)
end

local function get_reading(readings, lang, i, i_end, start)
	if i == i_end then
		return sub(readings, start, i - 1)
	end
	local c = sub(readings, i, i)
	if c == "," and (
		lang == "cmn" or
		lang == "csp" or
		lang == "wuu" or
		lang == "yue" or
		lang == "zhx-tai"
	) then
		if sub(readings, i + 1, i + 1) ~= " " then
			return sub(readings, start, i - 1)
		end
	elseif c == "/" then
		return sub(readings, start, i - 1)
	end
end

local function handle_readings(readings, lang, tr)
	if lang == "ltc" or lang == "och" then
		if tr and readings ~= tr then
			return false
		end
		return readings
	end
	local tr_orig, i, start, i_end, reading = tr, 1, 1, #readings + 1
	while i <= i_end do
		reading = get_reading(readings, lang, i, i_end, start)
		if not reading then
		elseif not reading:find("=") then
			if (
				not tr or
				tr == reading or
				gsub(ulower(tr), "%^", "") == reading
			) then
				tr = reading
			elseif ulower(reading) ~= tr then
				return false
			end
			start = i + 1
		elseif lang == "cmn" and reading == "cap=y" then
			local tr_cap = "^" .. tr
			if not tr_orig or tr_orig == tr_cap then
				tr = tr_cap
			end
		end
		i = i + 1
	end
	return tr
end

local function iterate_content(content, lang, see, seen, tr)
	for template, args in findTemplates(content) do
		if template == "zh-pron" then
			for k, v in pairs(args) do
				if (
					#v > 0 and
					type(k) == "string" and
					frame:preprocess(k) == lect_code[lang]
				) then
					tr = handle_readings(frame:preprocess(v), lang, tr)
					break
				end
			end
			if tr == false then
				return tr
			end
		elseif template == "zh-see" then
			local arg = trim(frame:preprocess(args[1]))
			if not seen[arg] then
				insert(see, arg)
			end
		end
	end
	return tr
end

function export.tr(text, lang, sc)
	if (not text) or text == "" then
		return text
	end
	
	if lang == "zh" or lang == "lzh" then
		lang = "cmn"
	end
	
	if not lect_code[lang] then
		lang = require("Module:languages").getByCode(lang, nil, true):getFullCode()
	end
	
	local content = get_content(text)
	if not content then
		return fail(lang)
	end
	
	local see = {}
	local seen = {
		[text] = true
	}
	local tr = iterate_content(content, lang, see, seen)
	
	if tr == nil then
		local i, title = 1
		while i <= #see do
			title = see[i]
			content = get_content(title)
			if content then
				tr = iterate_content(content, lang, see, seen, tr)
				if tr == false then
					return fail(lang)
				end
				seen[title] = true
			end
			i = i + 1
		end
	end
	
	if not tr then
		return fail(lang)
	end
	
	if lang == "cmn" then
		tr = tr:gsub("#", "")
		if tr:match("[\194-\244]") then
			tag = tag or mw.loadData("Module:zh/data/cmn-tag").MT
			tr = tr:gsub(".[\128-\191]*", function(m)
				if m == "一" then
					return "yī"
				elseif m == "不" then
					return "bù"
				else
					m = tag[m] and tag[m][1]
					if m then
						return toNFD(m):gsub("^[aeiou]", "'%0")
					end
				end
			end)
				:gsub("^'", "") --remove initial apostrophe inserted by previous function
		end
		tr = ugsub(tr, "%^(.)", uupper)
	elseif lang == "csp" or lang == "yue" or lang == "zhx-tai" then
		tr = tr:gsub("%d[%d%*%-]*%f[^%d%*]", "<sup>%0</sup>")
	elseif lang == "hak" then
		-- TODO
	elseif lang == "ltc" or lang == "och" then
		if tr == "n" then
			return fail(lang)
		end
		local index = {}
		if tr then
			if lang == "ltc" then
				index = mw.text.split(tr, ",")
			else
				index = mw.text.split(tr, ";")
			end
		end
		for i = 1, ulen(text) do
			local module_type = lang .. "-pron"
			if lang == "och" then
				module_type = module_type .. "-ZS"
			end
			
			local data_module = safe_require("Module:zh/data/" .. module_type .. "/" .. usub(text, i, i))
			
			if not data_module or (((not index[i]) or index[i] == "y") and #data_module > 1) then
				return fail(lang)
			end
			
			if index[i] == "y" then
				index[i] = 1
			elseif index[i] then
				index[i] = tonumber(index[i])
			end
			
			index[i] = index[i] and data_module[index[i]] or data_module[1]
			
			if lang == "ltc" then
				local data = mw.loadData("Module:ltc-pron/data")
				local initial, final, tone = require("Module:ltc-pron").infer_categories(index[i])
				tone = tone ~= "" and ("<sup>" .. tone .. "</sup>") or tone
				index[i] = data.initialConv["Zhengzhang"][initial] .. data.finalConv["Zhengzhang"][final] .. tone
			else
				index[i] = index[i][6]
			end
		end
		tr = table.concat(index, " ")
		if lang == "och" then
			tr = "*" .. tr
		end
	elseif lang == "nan" then
		-- TODO
	elseif lang == "nan-tws" then
		tr = require("Module:nan-pron").pengim_display(tr)
	elseif lang == "wuu" then
		local w_pron = require("Module:wuu-pron")
		if tr:match(';') then
			--TODO
			return fail(lang)
		elseif tr:match(':') then
			tr = w_pron.wugniu_format(tr:sub(4))
		else
			tr = w_pron.wugniu_format(w_pron.wikt_to_wugniu(tr))
		end
	elseif lang == "zhx-sic" then
		tr = ugsub(tr, "([%d-])(%a)", "%1 %2")
			:gsub("%d[%d%*%-]*%f[^%d%*]", "<sup>%0</sup>")
	else
		tr = require("Module:" .. lang .. "-pron").rom(tr)
	end
	
	-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
	return tr .. " "
end

return export