মডিউল:script utilities

উইকিঅভিধান, মুক্ত অভিধান থেকে

This module provides access to Module:scripts from templates, so that they can make use of the information stored there. It also provides a number of functions that can be used by other modules.

Exported functions[সম্পাদনা]


tag_text(text, lang, sc, face)

Wraps the given text in HTML tags with appropriate CSS classes (see WT:CSS) for the language and script. This is required for all non-English text on Wiktionary.

The actual tags and CSS classes that are added are determined by the face parameter. It can be one of the following:

The text is wrapped in <i class="(sc) mention" lang="(lang)">...</i>.
The text is wrapped in <strong class="(sc) headword" lang="(lang)">...</strong>.
The text is wrapped in <b class="(sc)" lang="(lang)">...</b>.
The text is wrapped in <span class="(sc)" lang="(lang)">...</span>.


transliterate(text, lang, sc)

Generates a transliteration (romanization) from the given text into Latin script, using the transliteration rules of the given language. This will use the module specified with the translit_module setting for the language in Module:languages. If no transliteration module is available, or if the transliteration fails in some other way, nil is returned.


request_script(lang, sc)

Generates a request to provide a term in its native script, if it is missing. This is used by the {{rfscript}} template as well as by the functions in Module:links.

The function will add entries to one of the subcategories of Category:Terms needing native script by language, and do several checks on the given language and script. In particular:

  • If the script was given, a subcategory named "needing (script) script" is added, but only if the language has more than one script. Otherwise, the main "needing native script" category is used.
  • Nothing is added at all if the language has no scripts other than Latin and its varieties.


{{#invoke:script utilities|template_rfscript}}

This is used by {{rfscript}}. See there for more information.

See also[সম্পাদনা]

local export = {}

	Modules used:
	[[Module:script utilities/data]]
	[[Module:senseid]] (only when id's present)
	[[Module:string utilities]] (only when hyphens in Korean text or spaces in vertical text)

function export.is_Latin_script(sc)
	-- Latn, Latf, Latinx, pjt-Latn
	return sc:getCode():find("Lat") and true or false

-- Used by [[Template:lang]]
function export.lang_t(frame)
	local params = {
		[1] = {},
		[2] = { allow_empty = true, default = "" },
		["sc"] = {},
		["face"] = {},
		["class"] = {},
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local NAMESPACE = mw.title.getCurrentTitle().nsText
	local lang = args[1] or (NAMESPACE == "Template" and "und") or error("Language code has not been specified. Please pass parameter 1 to the template.")
	lang = require("Module:languages").getByCode(lang) or require("Module:languages").err(lang, 1)
	local text = args[2]
	local sc = args["sc"]
	sc = (sc and (require("Module:scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil)
	local face = args["face"]
	local class = args["class"]
	return export.tag_text(text, lang, sc, face, class)

-- Ustring turns on the codepoint-aware string matching. The basic string function
-- should be used for simple sequences of characters, Ustring function for
-- sets – [].
local function trackPattern(text, pattern, tracking, ustring)
	local find = ustring and mw.ustring.find or string.find
	if pattern and find(text, pattern) then
		require("Module:debug/track")("script/" .. tracking)

local function track(text, lang, sc)
	local u = mw.ustring.char
	if lang and text then
		local langCode = lang:getCode()
		-- [[Special:WhatLinksHere/Template:tracking/script/ang/acute]]
		if langCode == "ang" then
			local decomposed = mw.ustring.toNFD(text)
			local acute = u(0x301)
			trackPattern(decomposed, acute, "ang/acute")
			ϑ, ϰ, ϱ, ϕ should generally be replaced with θ, κ, ρ, φ.
		elseif langCode == "el" or langCode == "grc" then
			trackPattern(text, "ϑ", "Greek/wrong-theta")
			trackPattern(text, "ϰ", "Greek/wrong-kappa")
			trackPattern(text, "ϱ", "Greek/wrong-rho")
			trackPattern(text, "ϕ", "Greek/wrong-phi")
			[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-coronis]]
			[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-smooth-breathing]]
			[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/wrong-apostrophe]]
				When spacing coronis and spacing smooth breathing are used as apostrophes, 
				they should be replaced with right single quotation marks (’).
			if langCode == "grc" then
				trackPattern(text, u(0x1FBD), "Ancient Greek/spacing-coronis")
				trackPattern(text, u(0x1FBF), "Ancient Greek/spacing-smooth-breathing")
				trackPattern(text, "[" .. u(0x1FBD) .. u(0x1FBF) .. "]", "Ancient Greek/wrong-apostrophe", true)
		-- [[Special:WhatLinksHere/Template:tracking/script/Russian/grave-accent]]
		elseif langCode == "ru" then
			local decomposed = mw.ustring.toNFD(text)
			trackPattern(decomposed, u(0x300), "Russian/grave-accent")
		-- [[Special:WhatLinksHere/Template:tracking/script/Tibetan/trailing-punctuation]]
		elseif langCode == "bo" then
			trackPattern(text, "[་།]$", "Tibetan/trailing-punctuation", true)
			trackPattern(text, "[་།]%]%]$", "Tibetan/trailing-punctuation", true)

		elseif langCode == "th" then
			trackPattern(text, "เ".."เ", "Thai/broken-ae")
			trackPattern(text, "ํ[่้๊๋]?า", "Thai/broken-am", true)
			trackPattern(text, "[ฤฦ]า", "Thai/wrong-rue-lue", true)

		elseif langCode == "lo" then
			trackPattern(text, "ເ".."ເ", "Lao/broken-ae")
			trackPattern(text, "ໍ[່້໊໋]?າ", "Lao/broken-am", true)
			trackPattern(text, "ຫນ", "Lao/possible-broken-ho-no")
			trackPattern(text, "ຫມ", "Lao/possible-broken-ho-mo")
			trackPattern(text, "ຫລ", "Lao/possible-broken-ho-lo")

		elseif langCode == "khb" then
			trackPattern(text, "ᦵ".."ᦵ", "Lü/broken-ae")
			trackPattern(text, "[ᦀ-ᦫ][ᦵᦶᦷᦺ]", "Lü/possible-wrong-sequence", true)

-- Wrap text in the appropriate HTML tags with language and script class.
function export.tag_text(text, lang, sc, face, class, id)
	if not sc then
		sc = require("Module:scripts").findBestScript(text, lang)
	track(text, lang, sc)
	-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
	if sc:getDirection() == "down" and text:find(" ") then
		text = require("Module:munge_text")(text, function(txt)
			-- having extra parentheses makes sure only the first return value gets through
			return (txt:gsub(" +", "<br>"))

	-- Hack Korean script text to remove hyphens.
	-- XXX: This should be handled in a more general fashion, but needs to
	-- be efficient by not doing anything if no hyphens are present, and currently this is the only
	-- language needing such processing.
	-- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]].
	if sc:getCode() == "Kore" and (text:find("%-") or text:find("[()]")) then
		text = require("Module:munge_text")(text, function(txt)
			-- Hani/Hang regex is a reasonable subset of Hani/Hang from [[Module:scripts/data]],
			-- last checked on 20220221
			txt = txt:gsub("%-", "")
			txt = mw.ustring.gsub(txt, "([一-鿿㐀-䶿𠀀-𮯯𰀀-𱍏]+)%(([가-힣ᄀ-ᇿꥠ-ꥼힰ-ퟻ]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>")
			return txt
	if sc:getCode() == "Imag" then
		face = nil

	local function class_attr(classes)
		table.insert(classes, 1, sc:getCode())
		if class and class ~= '' then
			table.insert(classes, class)
		return 'class="' .. table.concat(classes, ' ') .. '"'
	local function tag_attr(...)
		local output = {}
		if id then
			table.insert(output, 'id="' .. require("Module:senseid").anchor(lang, id) .. '"')
		table.insert(output, class_attr({...}) )
		if lang then
			table.insert(output, 'lang="' .. lang:getCode() .. '"')
		return table.concat(output, " ")
	if face == "hypothetical" then
	-- [[Special:WhatLinksHere/Template:tracking/script-utilities/face/hypothetical]]
	local data = mw.loadData("Module:script utilities/data").faces[face or "nil"]
	local post = ""
	if sc:getDirection() == "rtl" and (face == "translation" or mw.ustring.find(text, "%p$")) then
		post = "&lrm;"
	-- Add a script wrapper
	if data then
		return ( data.prefix or "" ) .. '<' .. data.tag .. ' ' .. tag_attr(data.class) .. '>' .. text .. '</' .. data.tag .. '>' .. post
		error('Invalid script face "' .. face .. '".')

function export.tag_translit(translit, lang, kind, attributes, is_manual)
	if type(lang) == "table" then
		lang = lang.getCode and lang:getCode()
			or error("Second argument to tag_translit should be a language code or language object.")
	local data = mw.loadData("Module:script utilities/data").translit[kind or "default"]
	local opening_tag = {}
	table.insert(opening_tag, data.tag)
	if lang == "ja" then
		table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr"')
		table.insert(opening_tag, 'lang="' .. lang .. '-Latn"')
		table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"')
	if data.dir then
		table.insert(opening_tag, 'dir="' .. data.dir .. '"')
	table.insert(opening_tag, attributes)
	return "<" .. table.concat(opening_tag, " ") .. ">" .. translit .. "</" .. data.tag .. ">"

function export.tag_transcription(transcription, lang, kind, attributes)
	if type(lang) == "table" then
		lang = lang.getCode and lang:getCode()
			or error("Third argument to tag_translit should be a language code or language object.")
	local data = mw.loadData("Module:script utilities/data").transcription[kind or "default"]
	local opening_tag = {}
	table.insert(opening_tag, data.tag)
	if lang == "ja" then
		table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts"')
		table.insert(opening_tag, 'lang="' .. lang .. '-Latn"')
		table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts Latn"')
	if data.dir then
		table.insert(opening_tag, 'dir="' .. data.dir .. '"')
	table.insert(opening_tag, attributes)
	return "<" .. table.concat(opening_tag, " ") .. ">" .. transcription .. "</" .. data.tag .. ">"	

-- Add a notice to request the native script of a word
function export.request_script(lang, sc, usex, nocat, sort_key)
	local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.')
	-- By default, request for "native" script
	local cat_script = "native"
	local disp_script = "script"
	-- If the script was not specified, and the language has only one script, use that.
	if not sc and #scripts == 1 then
		sc = scripts[1]
	-- Is the script known?
	if sc then
		-- If the script is Latin, return nothing.
		if export.is_Latin_script(sc) then
			return ""
		if sc:getCode() ~= scripts[1]:getCode() then
			disp_script = sc:getCanonicalName()
		-- The category needs to be specific to script only if there is chance
		-- of ambiguity. This occurs when lang=und, or when the language has
		-- multiple scripts.
		if lang:getCode() == "und" or scripts[2] then
			cat_script = sc:getCanonicalName()
		-- The script is not known.
		-- Does the language have at least one non-Latin script in its list?
		local has_nonlatin = false
		for i, val in ipairs(scripts) do
			if not export.is_Latin_script(val) then
				has_nonlatin = true
		-- If there are no non-Latin scripts, return nothing.
		if not has_nonlatin then
			return ""
	local category
	if usex then
		category = "Requests for " .. cat_script .. " script in " .. lang:getCanonicalName() .. " usage examples"
		category = "Requests for " .. cat_script .. " script for " .. lang:getCanonicalName() .. " terms"
	return "<small>[" .. disp_script .. " needed]</small>" ..
		(nocat and "" or require("Module:utilities/format_categories")({category}, lang, sort_key))

function export.template_rfscript(frame)
	local params = {
		[1] = { required = true, default = "und" },
		["sc"] = {},
		["usex"] = { type = "boolean" },
		["nocat"] = { type = "boolean" },
		["sort"] = {},
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local lang = require("Module:languages").getByCode(args[1], 1)
	local sc = args.sc and require("Module:scripts").getByCode(args.sc, true)

	local ret = export.request_script(lang, sc, args.usex, args.nocat, args.sort)
	if ret == "" then
		error("This language is written in the Latin alphabet. It does not need a native script.")
		return ret

function export.checkScript(text, scriptCode, result)
	local scriptObject = require("Module:scripts").getByCode(scriptCode)
	if not scriptObject then
		error('The script code "' .. scriptCode .. '" is not recognized.')
	local originalText = text
	-- Remove non-letter characters.
	text = mw.ustring.gsub(text, "[%A]", "")
	-- Remove all characters of the script in question.
	text = mw.ustring.gsub(text, "[" .. scriptObject:getCharacters() .. "]", "")
	if text ~= "" then
		if type(result) == "string" then
			error('The text "' .. originalText .. '" contains the letters "' .. text .. '" that do not belong to the ' .. scriptObject:getDisplayForm() .. '.', 2)

return export