বিষয়বস্তুতে চলুন

মডিউল:sa-translit

উইকিঅভিধান, মুক্ত অভিধান থেকে

এই মডিউলের জন্য মডিউল:sa-translit/নথি-এ নথিপত্র তৈরি করা হয়ে থাকতে পারে

local export = {}

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local toNFC = mw.ustring.toNFC
local U = m_str_utils.char

local grave = U(0x300)
local acute = U(0x301)
local diaeresis = U(0x308)
local svar = U(0x951)
local anud = U(0x952)
local d_svar = U(0x1CDA) -- double svarita, sometimes used for long vowel with svarita

local consonants = {
	['क']='ক', ['ख']='খ', ['ग']='গ', ['घ']='ঘ', ['ङ']='ঙ',
	['च']='চ', ['छ']='ছ', ['ज']='জ', ['झ']='ঝ', ['ञ']='ঞ', 
	['ट']='ট', ['ठ']='ঠ', ['ड']='ড', ['ढ']='ঢ', ['ण']='ণ', 
	['त']='ত', ['थ']='থ', ['द']='দ', ['ध']='ধ', ['न']='ন', 
	['प']='প', ['फ']='ফ', ['ब']='ব', ['भ']='ভ', ['म']='ম',
	['य']='য়', ['र']='র', ['ल']='ল', ['व']='ৱ', ['ळ']='ল',
	['श']='শ', ['ष']='ষ', ['स']='স', ['ह']='হ',
}

local diacritics = {
	['ा']='া', ['ि']='ি', ['ी']='ী', ['ु']='ু', ['ू']='ূ', ['ृ']='ṛ', ['ॄ']='ṝ', 
	['ॢ']='ḷ', ['ॣ']='ḹ', ['े']='ে', ['ै']='ৈ', ['ो']='ো', ['ौ']='ৌ',  ['्']='্',
}

local tt = {
	-- vowels
	['अ']='অ', ['आ']='আ', ['इ']='ইi', ['ई']='ঈ', ['उ']='উ', ['ऊ']='ঊ', ['ऋ']='ঋ', ['ॠ']='ৠ',
	['ऌ']='ঌ', ['ॡ']='ৡ', ['ए']='এ', ['ऐ']='ঐ', ['ओ']='ও', ['औ']='ঔ', 
	-- chandrabindu
	['ँ']='ঁ', --until a better method is found
	-- anusvara
	['ं']='ং', --until a better method is found
	['ꣳ']='ṃ',  -- candrabindu virama
	-- visarga
	['ः']='ঃ',
	-- avagraha
	['ऽ']='ʼ',
	--numerals
	['०']='০', ['१']='১', ['२']='২', ['৩']='৩', ['४']='৪', ['५']='৫', ['६']='৬', ['७']='৭', ['८']='৮', ['९']='৯',
	--punctuation        
--  ['॥']='.', --double danda
--	['।']='.', --danda
    --Vedic extensions
    ['ᳵ']='x', ['ᳶ']='f',
    --Om
    ['ॐ']='ॐ',
    --reconstructed
    ['*'] = '',
}

function export.tr(text, lang, sc)
	if sc ~= "Deva" then
		return nil
	end

	-- Vedic accent handling 
	if text:match(anud) or text:match(svar) or text:match(d_svar) then
		-- insert 'a' after consonants without vowel diacritic or virama
		text = gsub(text, '([क-ह])([ा-्ॢॣ]?)', 
			function(c,d)
				if d == "" then return c .. '৽' else return c .. d end
			end)
		local vow_list = "aअ-औा-ौॠ-ॣ"
		local vow = "[" .. vow_list .. "]"
		local extra_list = "ःंँ" -- visarga, anusvara, candrabindu
		local extra = "[" .. extra_list .. "]"		
		local acc_list = grave .. acute .. svar .. anud .. d_svar
--		local cons_list = "क-हᳵᳶऽ् \'" -- consonants + avagraha + virama + space + apostrophe (from e.g. bold formatting)
		-- Workaround: the consonants (plus a few other signs, see outcommented 'local cons_list') 
		-- are defined by negating the non-consonants, so as to include 
		-- the munged versions of formatting characters (e.g. bold formatting)
        local cons = "[^" .. vow_list .. acc_list .. extra_list .. "।॥ॐ]" 
        -- independent svarita before udatta or other independent svarita (indicated by १/३ with both svarita and anudatta sign)
		text = gsub(text, "(" .. extra .. "?)" .. anud .. "?[१३][" .. anud .. svar .. d_svar .. "]+(" .. 
			cons .. "*" .. vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)",
			function(a,b,c,d)
				if d ~= "" then
					return grave .. a .. b .. grave .. c	-- 2 × independent svarita
				else
					return grave .. a .. b .. acute .. c	-- independent svarita + udatta
				end
			end)
		-- optional: a few non-Rigvedic ways to mark the independent svarita (but compatible with Rigvedic system)
		-- 1) ᳡ (U+1CE1) used by Atharvavedic Śaunakīya Saṃhitā 
		-- 2) ᳖ (U+1CD6) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'standard' independent svarita
		-- 3) ᳕ (U+1CD5) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'aggravated' independent svarita (before udatta)
		-- note that the Rigvedic system doesn't distinguish between dependent vs. independendent 
		-- svarita after udatta (the latter would need manual addition of grave)
		text = gsub(text, "(" .. extra .. "?)[᳡᳖]", grave .. "%1")
		text = gsub(text, "(" .. extra .. "?)᳕(" .. cons .. "*" .. vow ..")", grave .. "%1%2" .. acute)
		-- initial udatta/svarita
		text = gsub(text, "^(" .. cons .. "*" .. vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
			function(a,b,c)
				if c == svar or c == d_svar then
					return a .. grave .. b -- initial svarita
				else
					return a .. acute .. b .. c -- initial udatta
				end
			end)
		-- the same, after (double) danda or 'om'
		text = gsub(text, "([।॥ॐ]" .. cons .. "*" .. vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
			function(a,b,c)
				if c == svar or c == d_svar then
					return a .. grave .. b -- initial svarita
				else
					return a .. acute .. b .. c -- initial udatta
				end
			end)
		-- in case of anudatta sign not before other anudatta sign (nor before grave accent from १/३)
		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" .. 
			vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
			function(a,b,c)
				if c == svar or c == d_svar then  
					return a .. grave .. b -- independent svarita
				else
					return a .. acute .. b .. c -- udatta
				end
			end)
		-- and again (excluding acute on next vowel), in case of overlapping patterns (if 'c' above happens to be another vowel with anudatta)
		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" .. 
			vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. acute .. extra_list .. "])",
			function(a,b,c)
				if c == svar or c == d_svar then  
					return a .. grave .. b -- independent svarita
				else
					return a .. acute .. b .. c -- udatta
				end
			end)
		-- the same, string final
		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" .. 
			vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)$",
			function(a,b,c)
				if c ~= "" then  
					return a .. grave .. b -- independent svarita
				else
					return a .. acute .. b -- udatta
				end
			end)
		-- unmarked vowel after udatta is also udatta
		text = gsub(text, "(" .. vow .. acute .. extra .. "?" .. cons .. "*" .. 
			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
		-- and again, in case of three udatta's in a row
		text = gsub(text, "(" .. vow .. acute .. extra .. "?" .. cons .. "*" .. 
			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
		-- yet again: 4 udatta's in a row occurs in RV.1.164.39
		text = gsub(text, "(" .. vow .. acute .. extra .. "?" .. cons .. "*" .. 
			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
		-- the same, string final
		text = gsub(text, "(" .. vow .. acute .. extra .. "?" .. cons .. "*" .. 
			vow .. ")(" .. extra .. "?)$", "%1" .. acute .. "%2")
		-- remove remaining anudatta and svarita signs
		text = gsub(text, "[" .. anud .. svar .. d_svar .. "]", "")
		
		text = gsub(text, '.', consonants)
		text = gsub(text, '.', diacritics)
	else -- no Vedic accents
		text = gsub(
		text,
		'([क-ह])'..
		'([ािीुूृॄॢॣेैोौ्]?)'..
		'([अ-औ]?)',
		function(c, d, e)
			if d == "" and e ~= "" then        
				if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. diaeresis
				else return consonants[c] .. 'a' .. tt[e] end
            elseif e ~= "" then
				return consonants[c] .. diacritics[d] .. tt[e]
			elseif d == "" then        
				return consonants[c] .. '৽'
			else
				return consonants[c] .. diacritics[d]
			end
		end)
	end
	
	text = gsub(text, '([aअ][' .. acute .. grave .. ']?[इउ])', '%1' .. diaeresis)
	text = gsub(text, '.', tt)
	text = gsub(text, " ?[।॥]", ".")
	text = toNFC(text)
	
	return text
end
 
return export