بۆ ناوەڕۆک بازبدە

مۆدیوول:form of/lang-data/en/functions

لە ویکیفەرھەنگ

"بەڵگەدارکردنی ئەم مۆدیوولە دەکرێ لە مۆدیوول:form of/lang-data/en/functions/docدا دروست بکرێ"

--[=[
This module contains lang-specific functions for English.

TODO:
* Handle alternative forms in umlaut more elegantly.
* Enable multiple categories (e.g. umlaut and plural in -n).
* Handle dobuled-consonants with Germanic plurals (e.g. "Yid" to "Yidden").
* Handle trivial consonant changes in umlaut plurals (e.g. "cow" to "kine").
]=]

local en_utilities_module = "Module:en-utilities"
local headword_data_module = "Module:headword/data"
local links_module = "Module:links"
local load_module = "Module:load"
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
local utilities_module = "Module:utilities"

local ipairs = ipairs
local require = require
local toNFD = mw.ustring.toNFD
local type = type

--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
	local function get_fragment(...)
		get_fragment = require(links_module).get_fragment
		return get_fragment(...)
	end

	local function get_plaintext(...)
		get_plaintext = require(utilities_module).get_plaintext
		return get_plaintext(...)
	end

	local function insert_if_not(...)
		insert_if_not = require(table_module).insertIfNot
		return insert_if_not(...)
	end

	local function is_regular_plural(...)
		is_regular_plural = require(en_utilities_module).is_regular_plural
		return is_regular_plural(...)
	end

	local function load_data(...)
		load_data = require(load_module).load_data
		return load_data(...)
	end

	local function pattern_escape(...)
		pattern_escape = require(string_utilities_module).pattern_escape
		return pattern_escape(...)
	end

	local function remove_possessive(...)
		remove_possessive = require(en_utilities_module).remove_possessive
		return remove_possessive(...)
	end

	local function split(...)
		split = require(string_utilities_module).split
		return split(...)
	end

	local function u(...)
		u = require(string_utilities_module).char
		return u(...)
	end

	local function ugsub(...)
		ugsub = require(string_utilities_module).gsub
		return ugsub(...)
	end

	local function ulower(...)
		ulower = require(string_utilities_module).lower
		return ulower(...)
	end

	local function umatch(...)
		umatch = require(string_utilities_module).match
		return umatch(...)
	end

--[==[
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
	local diacritics
	local function get_diacritics()
		diacritics, get_diacritics = load_data(headword_data_module).page.comb_chars.diacritics_all .. "*", nil
		return diacritics
	end
	
	local vowels
	local function get_vowels()
		vowels, get_vowels = require(en_utilities_module).vowels, nil
		return vowels
	end

----------------------- Category functions -----------------------

-- Double the final consonant of a term if preceded by a single vowel, which is common with Germanic suffixes.
local function double_final_consonant(term)
	local stem, final = term:match("^(.*)([bcdfghjklmnpqrstvxz])$") -- Include "h" and "x" just in case.
	if not stem then
		return term
	end
	diacritics = diacritics or get_diacritics()
	stem = umatch(ulower(toNFD(stem)), "^(.-)" .. diacritics .. "[" .. (vowels or get_vowels()) .. "]" .. diacritics .. "$")
	if not stem then
		return term
	end
	-- If not preceded by a vowel (treating "y" as a consonant if not preceded
	-- by another consonant), then the final consonant is doubled.
	if stem == "" or stem == "y" or umatch(stem, "[" .. vowels .. "]" .. diacritics .. "y$") or umatch(stem, "[^" .. vowels .. "]$") then
		return term .. final
	end
	return term
end

local function check_germanic_suffix(pagename, lemma, stem)
	-- Usually requires an epenthetic "e" (e.g. "foxen"), but not always
	-- (e.g. "feathern").
	if lemma == stem or lemma .. "e" == stem then
		return true
	end
	-- Final -er sometimes becomes -ren (e.g. "sistren").
	local reduced = lemma:match("^(.-)er$")
	if reduced and (reduced .. "re") == stem then
		return true
	end
	local doubled = double_final_consonant(lemma)
	return doubled .. "e" == stem
end

-- List of umlaut plurals. Each entry is of the form {SINGULAR, PLURAL}, where any lemma ending in SINGULAR whose plural
-- ends in PLURAL are counted (hence [[dormouse]] plural [[dormice]] is counted). The entries are Lua patterns.
local umlaut_plurals = {
	-- [[mouse]] -> [[mice]], [[louse]] -> [[lice]], jocular [[house]] -> [[hice]], [[spouse]] -> [[spice]]
	{"ouse", "ice"},
	-- [[goose]] -> [[geese]], [[swoose]] -> [[sweese]], jocular [[moose]] -> [[meese]]
	{"oose", "eese"},
	{"an", "en"},
	{"ann", "enn"},
	{"anne", "enne"},
	{"oot", "eet"},
	{"oote", "eete"},
	{"ooth", "eeth"},
	{"oof", "eef"},
	{"other", "ethren"},
	{"other", "ethern"},
	{"other", "etheren"},
	{"ow", "[iy]e?"},
	{"ow", "[iy]ne"},
}

--[=[
The key `cat` must be specified and is the name of the category following the language name. Suffixes enclosed in
double angle brackets, e.g. <<-ata>>, are italicized (as if written e.g. {{m|en||-ata}}) in the displayed title, but
not in the category name itself. The description of the category comes from the `description` field; if omitted, it is
constructed from the category by adding "English irregular" to the beginning and appending the value of `desc_suffix`
(if given) to the end. Suffixes enclosed in double angle brackets are italicized, as described above, and template
calls are permitted.

The key `matches_plural` must be specified and is either a string or a function. If a string, the string is a Lua
pattern that should match the end of the pagename, and the remainder becomes the stem passed to `matches_lemma` (see
below). If a function, it should accept two arguments, the pagename and the lemma (or more precisely, the words in the
pagename and lemma that differ, if there are multiple words), and should return the stem of the pagename (minus the
ending) if the pagename matches the ending, otherwise nil.

The key `matches_lemma` must be specified and is either a string or a function. If a string, the string is a Lua
pattern that should match the lemma. If a function, it should accept three arguments, the pagename and lemma as in
`matches_plural`, and the stem returned by `matches_plural` or extracted from the pagename and ending. It should return
a boolean indicating whether the lemma matches.

The key `additional`, if given, is additional text to include in the category description as displayed on the page
itself, but not in the summary of the category as displayed on other pages. For further information, see the
`additional` field in [[Module:category tree/poscatboiler/data/documentation]].

The key `breadcrumb`, if given, is the breadcrumb text. See [[Module:category tree/poscatboiler/data/documentation]].
If omitted, the breadcrumb is constructed from the category name by remvoing "plurals in" from the beginning of the
category name.

The key `sort_key`, if given, specifies the sort key for the category in its parent category
[[:Category:English irregular plurals]]. By default it is derived from the breadcrumb by removing an initial hyphen.

If a plural doesn't match any of the entries, it goes into [[:Category:English miscellaneous irregular plurals]]. Note
that before checking these entries, plurals that are the same as the singular are excluded (i.e. not considered
irregular), as are plurals formed from the singular by adding [[-s]], [[-es]], [[-'s]] or [[-ses]] (if the singular ends
in '-s'; cf. [[bus]] -> 'busses', [[dis]] -> 'disses'), or by replacing final [[-y]] with [[-ies]].
]=]
local irregular_plurals = {
	{ -- siphon off "women" plurals 'English plurals in -men with singular in -man'
		cat = "plurals in <<-women>> with singular in <<-woman>>",
		additional = "Plurals formed by replacing a final <<-man>> with a final <<-men>> are found in [[:Category:English plurals in -men with singular in -man|Category:English plurals in ''-men'' with singular in ''-man'']].",
		matches_plural = "[Ww]omen",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. pagename:sub(-5, -3) .. "an"
		end,
	},
	{ -- siphon off most of the "umlaut" plurals that will otherwise end up in 'English miscellaneous irregular plurals'
		cat = "plurals in <<-men>> with singular in <<-man>>",
		additional = "Plurals formed by replacing a final <<-woman>> with a final <<-women>> are found in [[:Category:English plurals in -women with singular in -woman|Category:English plurals in ''-women'' with singular in ''-woman'']].",
		matches_plural = "[Mm]en",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. pagename:sub(-3, -3) .. "an"
		end,
	},
	{
		cat = "plurals with umlaut",
		description = "{{{langname}}} irregular noun plurals that are formed via [[umlaut]], i.e. by changing the root vowel rather than adding a suffix.",
		additional = [==[See also:
* Plurals formed by replacing a final <<-man>> with <<-men>> are found in [[:Category:English plurals in -men with singular in -man|Category:English plurals in ''-men'' with singular in ''-man'']]
* Plurals formed by replacing a final <<-woman>> with a final <<-women>> are found in [[:Category:English plurals in -women with singular in -woman|Category:English plurals in ''-women'' with singular in ''-woman'']]]==],
		sort_key = "umlaut",
		matches_plural = function(pagename, lemma)
			for _, umlaut_plural in ipairs(umlaut_plurals) do
				local stem = umatch(lemma, "^(.-)%f[" .. (vowels or get_vowels()) .. "]" .. umlaut_plural[1] .. "$")
				if stem and (
					umatch(pagename, "^" .. pattern_escape(stem) .. umlaut_plural[2] .. "$") or
					-- FIXME: this is really hacky ("cow" to "kyne").
					umatch(pagename, "^" .. pattern_escape(stem):gsub("c", "k") .. umlaut_plural[2] .. "$")
				) then
					return stem
				end
			end
		end,
		matches_lemma = function(pagename, lemma, stem)
			-- All the work already done in matches_plural().
			return true
		end,
	},
	{
		cat = "plurals in <<-tia>> with singular in <<-s>>",
		desc_suffix = ", mostly originating from Latin participles",
		matches_plural = "tia",
		matches_lemma = "s",
	},
	{
		cat = "plurals in <<-ia>> with singular in <<-e>>",
		desc_suffix = ", mostly originating from Latin neuter nouns",
		matches_plural = "ia",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "e"
		end,
	},
	{
		cat = "plurals in <<-ia>> with singular in <<-i>> or <<-y>>",
		desc_suffix = ", mostly originating from Greek neuter nouns",
		matches_plural = "ia",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "i" or lemma == stem .. "y"
		end,
	},
	{
		cat = "plurals in <<-ina>> with singular in <<-en>>",
		desc_suffix = ", mostly originating from Latin neuter nouns",
		additional = [==[
* Plurals formed by replacing a final <<-inum>> with a final <<-ina>> are found in [[:Category:English plurals in -a with singular in -um|Category:English plurals in ''-a'' with singular in ''-um'']].
* Plurals formed by replacing a final <<-inon>> with a final <<-ina>> are found in [[:Category:English plurals in -a with singular in -on|Category:English plurals in ''-a'' with singular in ''-on'']].]==],
		matches_plural = "ina",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "en"
		end,
	},
	{
		cat = "plurals in <<-ra>> with singular in <<-s>>",
		desc_suffix = ", mostly originating from Latin neuter nouns",
		additional = [==[Sometimes the preceding vowel changes; e.g. <<-us>> commonly changes to <<-era>> or <<-ora>> in the plural.
* Plurals formed by replacing a final <<-rum>> with a final <<-ra>> are found in [[:Category:English plurals in -a with singular in -um|Category:English plurals in ''-a'' with singular in ''-um'']].
* Plurals formed by replacing a final <<-ron>> with a final <<-ra>> are found in [[:Category:English plurals in -a with singular in -on|Category:English plurals in ''-a'' with singular in ''-on'']].]==],
		matches_plural = "ra",
		matches_lemma = function(pagename, lemma, stem)
			return lemma:find("s$")
		end,
	},
	{
		cat = "plurals in <<-ata>> with singular in <<-a>> or <<-e>>",
		desc_suffix = ", mostly originating from Ancient Greek neuter nouns in {{m|grc|-μᾰ}}",
		matches_plural = "ata",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "a" or lemma == stem .. "e"
		end,
	},
	{
		cat = "plurals in <<-ata>> with singular in <<-as>>",
		desc_suffix = ", mostly originating from Ancient Greek neuter nouns",
		matches_plural = "ata",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "as"
		end,
	},
	{
		cat = "plurals in <<-au>>",
		desc_suffix = ", mostly originating from Welsh",
		matches_plural = "au",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem
		end,
	},
	{
		cat = "plurals in <<-a>> with singular in <<-an>>",
		desc_suffix = ", mostly originating from Latin or Greek neuter nouns",
		additional = [==[See also:
* Plurals formed by replacing a final <<-um>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -um|Category:English plurals in ''-a'' with singular in ''-um'']].
* Plurals formed by replacing a final <<-on>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -on|Category:English plurals in ''-a'' with singular in ''-on'']].]==],
		matches_plural = "a",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "an"
		end,
	},
	{
		cat = "plurals in <<-a>> with singular in <<-on>>",
		desc_suffix = ", mostly originating from Greek or Latin nouns",
		additional = [==[See also:
* Plurals formed by replacing a final <<-um>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -um|Category:English plurals in ''-a'' with singular in ''-um'']].
* Plurals formed by replacing a final <<-an>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -an|Category:English plurals in ''-a'' with singular in ''-an'']].]==],
		matches_plural = "a",
		matches_lemma = function(pagename, lemma, stem)
			return umatch(toNFD(lemma), "^" .. pattern_escape(toNFD(stem)) .. "o" .. (diacritics or get_diacritics()) .. "n$")
		end
	},
	{
		cat = "plurals in <<-a>> with singular in <<-um>>",
		desc_suffix = ", mostly originating from Latin nouns",
		additional = [==[See also:
* Plurals formed by replacing a final <<-on>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -on|Category:English plurals in ''-a'' with singular in ''-on'']].
* Plurals formed by replacing a final <<-an>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -an|Category:English plurals in ''-a'' with singular in ''-an'']].]==],
		matches_plural = "a",
		matches_lemma = function(pagename, lemma, stem)
			return umatch(toNFD(lemma), "^" .. pattern_escape(toNFD(stem)) .. "u" .. (diacritics or get_diacritics()) .. "m$")
		end
	},
	{
		cat = "plurals in <<-a>>",
		desc_suffix = ", mostly originating from Greek or Latin nouns",
		additional = [==[See also:
* Plurals formed by replacing a final <<-um>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -um|Category:English plurals in ''-a'' with singular in ''-um'']].
* Plurals formed by replacing a final <<-on>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -on|Category:English plurals in ''-a'' with singular in ''-on'']].
* Plurals formed by replacing a final <<-an>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -an|Category:English plurals in ''-a'' with singular in ''-an'']].]==],
		matches_plural = "a",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem
		end,
	},
	{
		cat = "plurals in <<-ae>> with singular in <<-a>>",
		desc_suffix = ", mostly originating from Latin feminine nouns",
		additional = "The <<-ae>> can also be written as a ligature <<-æ>>.",
		matches_plural = function(pagename, lemma)
			return pagename:match("^(.*)ae$") or pagename:match("^(.*)æ$")
		end,
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "a"
		end,
	},
	{
		cat = "plurals in <<-ae>> with singular in <<-e>>",
		desc_suffix = ", mostly originating from Greek feminine nouns",
		additional = "The <<-ae>> can also be written as a ligature <<-æ>>.",
		matches_plural = function(pagename, lemma)
			return pagename:match("^(.*)ae$") or pagename:match("^(.*)æ$")
		end,
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "e"
		end,
	},
	{
		cat = "plurals in <<-ae>>",
		desc_suffix = ", mostly originating from Latin and Greek feminine nouns",
		additional = [==[The <<-ae>> can also be written as a ligature <<-æ>>. See also:
* [[:Category:English plurals in -ae with singular in -a|Category:English plurals in ''-ae'' with singular in ''-a'']]
* [[:Category:English plurals in -ae with singular in -e|Category:English plurals in ''-ae'' with singular in ''-e'']]]==],
		matches_plural = function(pagename, lemma)
			return pagename:match("^(.*)ae$") or pagename:match("^(.*)æ$")
		end,
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem
		end,
	},
	{ -- siphon off most of the "-people" plurals that will otherwise end up in 'English miscellaneous irregular plurals'
		cat = "plurals in <<-people>> with singular in <<-person>>",
		matches_plural = "people",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "person"
		end,
	},
	{
		cat = "plurals in <<-e>> with singular in <<-a>> or <<-ia>>",
		desc_suffix = ", mostly originating from Italian feminine nouns",
		matches_plural = "e",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "a" or
				lemma == stem .. "ia" or
				(stem:match("[cg]h$") and lemma == stem:sub(1, -2) .. "a")
		end,
	},
	{
		cat = "plurals in <<-e>>",
		desc_suffix = ", mostly originating from German masculine or neuter nouns",
		additional = "These are formed by adding <<-e>>. See also [[:Category:English plurals in -e with singular in -a or -ia|Category:English plurals in ''-e'' with singular in ''-a'' or ''-ia'']].",
		matches_plural = "e",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem or double_final_consonant(lemma) == stem
		end,
	},
	{
		cat = "plurals in <<-oth>>",
		desc_suffix = ", mostly originating from Hebrew feminine nouns",
		matches_plural = "oth",
		matches_lemma = function(pagename, lemma, stem)
			return true
		end,
	},
	{
		cat = "plurals in <<-ai>> with singular in <<-a>> or <<-e>>",
		desc_suffix = ", mostly originating from Greek feminine nouns",
		matches_plural = "ai",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "a" or lemma == stem .. "e"
		end,
	},
	{
		cat = "plurals in <<-ai>> with singular in <<-es>> or <<-is>>",
		desc_suffix = ", mostly originating from Greek masculine nouns",
		matches_plural = "ai",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "es" or lemma == stem .. "is"
		end,
	},
	{
		cat = "plurals in <<-oi>> with singular in <<-os>>",
		desc_suffix = ", mostly originating from Greek masculine nouns",
		matches_plural = "oi",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "os"
		end,
	},
	{
		cat = "plurals in <<-i>> with singular in <<-os>>",
		desc_suffix = ", mostly originating from Greek or Latin nouns",
		additional = "Plurals formed by replacing a final <<-us>> with a final <<-i>> are found in [[:Category:English plurals in -i with singular in -us|Category:English plurals in ''-i'' with singular in ''-us'']].",
		matches_plural = "i?i",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "os" or (lemma == stem .. "ios" and stem .. "i" ~= pagename)
		end,
	},
	{
		cat = "plurals in <<-i>> with singular in <<-us>>",
		desc_suffix = ", mostly originating from Latin nouns",
		additional = "Plurals formed by replacing a final <<-os>> with a final <<-i>> are found in [[:Category:English plurals in -i with singular in -os|Category:English plurals in ''-i'' with singular in ''-os'']].",
		matches_plural = "i?i",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "us" or (lemma == stem .. "ius" and stem .. "i" ~= pagename)
		end,
	},
	{
		cat = "plurals in <<-i>> with singular in <<-a>> or <<-ia>>",
		desc_suffix = ", mostly originating from Russian or Ukrainian masculine and feminine nouns or Italian masculine nouns",
		matches_plural = "i",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "a" or
				lemma == stem .. "ia" or
				(stem:match("[cg]h$") and lemma == stem:sub(1, -2) .. "a")
		end,
	},
	{
		cat = "plurals in <<-i>> with singular in <<-e>>",
		desc_suffix = ", mostly originating from Italian masculine nouns",
		matches_plural = "i",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "e"
		end,
	},
	{
		cat = "plurals in <<-i>> with singular in <<-o>> or <<-io>>",
		desc_suffix = ", mostly originating from Italian masculine nouns",
		matches_plural = "i",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "o" or
				lemma == stem .. "io" or
				(stem:match("[cg]h$") and lemma == stem:sub(1, -2) .. "o")
		end,
	},
	{
		cat = "plurals in <<-i>>",
		additional = [==[These are formed by adding <<i>>, or changing <<-a>> to <<-i>>. See also:
* [[:Category:English plurals in -i with singular in -us|Category:English plurals in ''-i'' with singular in ''-us'']]
* [[:Category:English plurals in -i with singular in -os|Category:English plurals in ''-i'' with singular in ''-os'']]
* [[:Category:English plurals in -i with singular in -o or -io|Category:English plurals in ''-i'' with singular in ''-o'' or ''-io'']]
* [[:Category:English plurals in -i with singular in -e|Category:English plurals in ''-i'' with singular in ''-e'']]]==],
		matches_plural = "i",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem
		end,
	},
	{ -- siphon off most of the "-sful" plurals that will otherwise end up in 'English miscellaneous irregular plurals'
		cat = "plurals in <<-sful>> with singular in <<-ful>>",
		additional = "This includes examples such as {{m|en|teaspoonful}}, plural {{m|en|teaspoonsful}}. Generally "
			.. "these refer to specific measures. Note that not all nouns in <<-ful>> pluralize this way; e.g. the "
			.. "plural of {{m|en|handful}} is normally {{m|en|handfuls}} (but {{m|en|handsful}} is possible, if rare).",
		matches_plural = "sful",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "ful" or
				-- [[boxesful]], [[brushesful]], [[busesful]], [[classesful]], [[dishesful]], [[glassesful]], etc.
				stem:find("e$") and lemma == stem:gsub("e$", "") .. "ful" or
				-- [[bakeriesful]], [[belliesful]], [[galleriesful]], [[librariesful]], [[pantriesful]], etc.
				stem:find("ie$") and lemma == stem:gsub("ie$", "y") .. "ful"
		end,
	},
	{
		cat = "plurals in <<-im>>",
		desc_suffix = ", mostly originating from Hebrew masculine nouns",
		additional = "Generally these are formed by simply adding <<-im>>, or <<-m>> if the singular ends in <<-i>> ({{m|en|illui}} – {{m|en|illuim}}; but cf. {{m|en|goiim}}). Some changes that may occur are <<-e->> to <<-a->> or vice versa ({{m|en|heder}} – {{m|en|hadarim}}; {{m|en|gaon}} – {{m|en|geonim}}), <<-f>> to <<-v->> ({{m|en|ganef}} – {{m|en|ganevim}}), and <<-s>> to <<-t->> ({{m|en|balabos}} – {{m|en|balabatim}}).",
		matches_plural = "im",
		matches_lemma = function(pagename, lemma, stem)
			return true
		end,
	},
	{
		cat = "plurals in <<-children>> with singular in <<-child>>",
		matches_plural = "[Cc]hildren",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. pagename:sub(-8, -4)
		end,
	},
	{
		cat = "plurals in <<-in>>",
		desc_suffix = ", mostly originating from Hebrew and Arabic masculine nouns",
		matches_plural = "in",
		matches_lemma = function(pagename, lemma, stem)
			return true
		end,
	},
	{
		cat = "plurals in <<-n>>",
		desc_suffix = ", mostly originating from Germanic nouns",
		additional = [==[See also:
* Plurals formed by replacing a final <<-man>> with <<-men>> are found in [[:Category:English plurals in -men with singular in -man|Category:English plurals in ''-men'' with singular in ''-man'']]
* Plurals formed by replacing a final <<-woman>> with a final <<-women>> are found in [[:Category:English plurals in -women with singular in -woman|Category:English plurals in ''-women'' with singular in ''-woman'']]
* Plurals formed by replacing a final <<-child>> with <<-children>> are found in [[:Category:English plurals in -children with singular in -child|Category:English plurals in ''-children'' with singular in ''-child'']]]==],
		matches_plural = "n",
		matches_lemma = check_germanic_suffix,
	},
	{
		cat = "plurals in <<-ar>>",
		desc_suffix = ", mostly originating from North Germanic nouns",
		matches_plural = "ar",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem
		end,
	},
	{
		cat = "plurals in <<-ir>>",
		desc_suffix = ", mostly originating from North Germanic nouns",
		matches_plural = "ir",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem
		end,
	},
	{
		cat = "plurals in <<-or>> with singular in <<-a>>",
		desc_suffix = ", mostly originating from Swedish nouns",
		matches_plural = "or",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "a"
		end,
	},
	{
		cat = "plurals in <<-r>>",
		desc_suffix = ", mostly originating from Germanic nouns",
		matches_plural = "r",
		matches_lemma = check_germanic_suffix,
	},
	{
		cat = "plurals in <<-bes>> with singular in <<-bs>>",
		desc_suffix = ", mostly originating from Latin or Greek masculine or feminine nouns",
		matches_plural = "bes",
		matches_lemma = "bs",
	},
	{
		cat = "plurals in <<-ces>> with singular in <<-x>>",
		desc_suffix = ", mostly originating from Latin masculine or feminine nouns",
		additional = "Generally these are formed by changing a final <<-x>> into <<-ces>> or a final <<-ex>> into <<-ices>>.",
		matches_plural = "ces",
		matches_lemma = "x",
	},
	{
		cat = "plurals in <<-des>> with singular in <<-d>>",
		desc_suffix = ", mostly originating from Latin or Greek masculine or feminine nouns, or Spanish feminine nouns",
		matches_plural = "des",
		matches_lemma = "d",
	},
	{
		cat = "plurals in <<-des>> with singular in <<-s>>",
		desc_suffix = ", mostly originating from Latin or Greek masculine or feminine nouns",
		matches_plural = "des",
		matches_lemma = "s",
	},
	{
		cat = "plurals in <<-ges>> with singular in <<-x>>",
		desc_suffix = ", mostly originating from Greek masculine or feminine nouns",
		matches_plural = "ges",
		matches_lemma = "x",
	},
	{
		cat = "plurals in <<-ies>> with singular in <<-ey>>",
		matches_plural = "ies",
		matches_lemma = "ey",
	},
	{
		cat = "plurals in <<-ies>> with singular in <<-i>>",
		matches_plural = "ies",
		matches_lemma = "i",
	},
	{
		cat = "plurals in <<-kes>> with singular in <<-x>>",
		desc_suffix = ", mostly originating from Greek masculine or feminine nouns",
		matches_plural = "kes",
		matches_lemma = "x",
	},
	{
		cat = "plurals in <<-ines>> with singular in <<-o>>",
		desc_suffix = ", mostly originating from Latin masculine or feminine nouns",
		matches_plural = "ines",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "o"
		end,
	},
	{
		cat = "plurals in <<-ones>> with singular in <<-o>>",
		desc_suffix = ", mostly originating from Latin masculine or feminine nouns",
		matches_plural = "ones",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "o"
		end,
	},
	{
		-- cf. [[eikon]] pl. [[eikones]]
		cat = "plurals in <<-ones>> with singular in <<-on>>",
		desc_suffix = ", mostly originating from Greek or Spanish masculine nouns",
		matches_plural = "ones",
		matches_lemma = function(pagename, lemma, stem)
			return umatch(toNFD(lemma), "^" .. pattern_escape(toNFD(stem)) .. "o" .. (diacritics or get_diacritics()) .. "n$")
		end
	},
	{
		cat = "plurals in <<-oes>> with singular in <<-o>>",
		matches_plural = "oes",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "o"
		end,
	},
	{
		-- cf. [[levator]] pl. [[levatores]]
		cat = "plurals in <<-ores>> with singular in <<-or>>",
		desc_suffix = ", mostly originating from Latin masculine nouns",
		matches_plural = "ores",
		matches_lemma = function(pagename, lemma, stem)
			return umatch(toNFD(lemma), "^" .. pattern_escape(toNFD(stem)) .. "o" .. (diacritics or get_diacritics()) .. "r$")
		end,
	},
	{
		cat = "plurals in <<-pes>> with singular in <<-ps>>",
		desc_suffix = ", mostly originating from Latin or Greek masculine or feminine nouns",
		matches_plural = "pes",
		matches_lemma = "ps",
	},
	{
		cat = "plurals in <<-res>> with singular in <<-r>>",
		desc_suffix = ", mostly originating from Greek, Latin, Portuguese, or Spanish masculine nouns",
		additional = "See also [[:Category:English plurals in -ores with singular in -or|Category:English plurals in ''-ores'' with singular in ''-or'']].",
		matches_plural = "res",
		matches_lemma = "r",
	},
	{
		cat = "plurals in <<-tes>> with singular in <<-s>>",
		desc_suffix = ", mostly originating from Latin or Greek masculine or feminine nouns",
		matches_plural = "tes",
		matches_lemma = "s",
	},
	{
		cat = "plurals in <<-ues>> with singular in <<-u>>",
		matches_plural = "ues",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "u"
		end,
	},
	{
		cat = "plurals in <<-ves>> with singular in <<-f>> or <<-fe>>",
		desc_suffix = ", mostly originating from native English formations",
		matches_plural = "ves",
		matches_lemma = "fe?",
	},
	{
		cat = "plurals with <<-xx->> instead of <<-x->>",
		desc_suffix = ", which are typically neologisms",
		matches_plural = "xxes",
		matches_lemma = function(pagename, lemma, stem)
			return is_regular_plural(stem .. "xes", lemma, "noun+")
		end,
	},
	{
		cat = "plurals in <<-es>> with singular in <<-is>>",
		desc_suffix = ", mostly originating from Greek feminine nouns, or analogous formations",
		matches_plural = "es",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "is"
		end,
	},
	{
		cat = "plurals in <<-es>> where <<-s>> is expected",
		desc_suffix = ", which chiefly occur in Early Modern English",
		matches_plural = "es",
		matches_lemma = function(pagename, lemma, stem)
			return is_regular_plural(stem .. "s", lemma, "noun+")
		end,
	},
	{
		cat = "plurals in <<-eis>> with singular in <<-is>>",
		desc_suffix = ", mostly originating from Greek feminine nouns",
		matches_plural = "eis",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "is"
		end,
	},
	{
		cat = "plurals in <<-s>> where <<-ies>> is expected",
		desc_suffix = ", which chiefly occur on an ad hoc basis",
		matches_plural = "ys",
		matches_lemma = function(pagename, lemma, stem)
			return is_regular_plural(stem .. "ies", lemma, "noun+")
		end,
	},
	{
		cat = "plurals in <<-'s>>",
		desc_suffix = ", mostly used where plurals ending in <<-s>> would appear strange or cause confusion",
		matches_plural = function(pagename, lemma)
			local stem = remove_possessive(pagename)
			return stem ~= pagename and stem or nil
		end,
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem
		end,
	},
	{
		cat = "plurals in <<-s>> where <<-es>> is expected",
		desc_suffix = ", which chiefly occur on an ad hoc basis",
		matches_plural = "s",
		matches_lemma = function(pagename, lemma, stem)
			return is_regular_plural(stem .. "es", lemma, "noun+")
		end,
	},
	{
		cat = "plurals in <<-ot>>",
		desc_suffix = ", mostly originating from Hebrew feminine nouns",
		matches_plural = "ot",
		matches_lemma = function(pagename, lemma, stem)
			return true
		end,
	},
	{
		cat = "plurals in <<-x>> with singular in <<-c>>, <<-ck>> or <<-k>>",
		desc_suffix = ", mostly as slang forms of plurals ending in <<-s>>",
		matches_plural = "x",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "c" or lemma == stem .. "ck" or lemma == stem .. "k"
		end,
	},
	{
		cat = "plurals in <<-x>>",
		desc_suffix = ", mostly originating from French masculine nouns",
		additional = "Generally these are formed by adding <<-x>> to a noun ending in <<-u>>; changing final <<-al>> or <<-ail>> to <<-aux>>; or changing final <<-el>> to <<-eaux>>.",
		matches_plural = "x",
		matches_lemma = "[lu]",
	},
	{
		cat = "plurals in <<-y>> with singular in <<-a>>",
		desc_suffix = ", mostly originating from Polish feminine nouns, Russian or Ukrainian masculine and feminine nouns, or Czech masculine nouns",
		matches_plural = "y",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem .. "a"
		end,
	},
	{
		cat = "plurals in <<-y>>",
		desc_suffix = ", mostly originating from Polish feminine nouns, Russian or Ukrainian masculine and feminine nouns, or Czech masculine nouns",
		additional = [==[The stem may be reduced (e.g. {{m|en|khokhol}} – {{m|en|khokhly}}). Plurals formed by replacing a final <<-a>> with <<-y>> are found in [[:Category:English plurals in -y with singular in -a|Category:English plurals in ''-y'' with singular in ''-a'']]]==],
		matches_plural = "y",
		matches_lemma = function(pagename, lemma, stem)
			if lemma == stem then
				return true
			end
			-- Try again with vowel reduction in the lemma.
			vowels = vowels or get_vowels()
			diacritics = diacritics or get_diacritics()
			lemma = ugsub(lemma, "([^" .. vowels .. "]" .. diacritics .. ")[" .. vowels .. "]" .. diacritics .. "([^" .. vowels .. "]+)$", "%1%2")
			return lemma == stem
		end,
	},
	{
		cat = "plurals in <<-z>>",
		desc_suffix = ", mostly as slang forms of plurals ending in <<-s>>",
		matches_plural = "z",
		matches_lemma = function(pagename, lemma, stem)
			return lemma == stem
		end,
	},
}

-- Check a given word. Increment `diff` if the pagename word is different and isn't possessive.
local function handle_word(pagename_word, lemma_word, categories, diff)
	if pagename_word == lemma_word then
		return diff
	end
	local pagename_nonposs, poss = remove_possessive(pagename_word)
	if pagename_nonposs ~= pagename_word then
		local lemma_nonposs = remove_possessive(lemma_word)
		if lemma_nonposs ~= lemma_word then
			poss, pagename_word, lemma_word = true, pagename_nonposs, lemma_nonposs
		end
	end
	if diff == 1 and not poss then
		return diff + 1
	elseif is_regular_plural(pagename_word, lemma_word, "noun+") then
		return diff + (poss and 0 or 1)
	end
	local is_match
	for _, irreg_plural in ipairs(irregular_plurals) do
		local matches_plural, stem = irreg_plural.matches_plural
		if type(matches_plural) == "string" then
			stem = umatch(pagename_word, "^(.-)" .. matches_plural .. "$")
		else
			stem = matches_plural(pagename_word, lemma_word)
		end
		if stem then
			local matches_lemma = irreg_plural.matches_lemma
			if type(matches_lemma) == "string" then
				is_match = umatch(lemma_word, matches_lemma .. "$")
			else
				is_match = matches_lemma(pagename_word, lemma_word, stem)
			end
			if is_match then
				local cat = irreg_plural.cat:gsub("<<(.-)>>", "%1")
				insert_if_not(categories, cat)
				break
			end
		end
	end
	if not is_match then
		insert_if_not(categories, "miscellaneous irregular plurals")
	end
	return diff + (poss and 0 or 1)
end

local function handle_lemma(pagename, lemma, categories)
	lemma = get_fragment(get_plaintext(lemma))
	if lemma == pagename then
		return
	end
	local pagename_words, lemma_words = split(pagename, "([-%s])"), split(lemma, "([-%s])")
	-- Different number of words.
	if #pagename_words ~= #lemma_words then
		return insert_if_not(categories, "miscellaneous irregular plurals")
	end
	local lemma_categories, diff = {}, 0
	for i = 1, #pagename_words, 2 do
		diff = handle_word(pagename_words[i], lemma_words[i], lemma_categories, diff)
		-- If two (non-possessive) words differ, only add the miscellaneous category.
		if diff == 2 then
			for j = 2, #categories do
				categories[j] = nil
			end
			categories[2] = "miscellaneous irregular plurals"
			return
		end
	end
	for _, cat in ipairs(lemma_categories) do
		insert_if_not(categories, cat)
	end
end

local function irregular_plural_categories(data)
	if not (data.pagename and data.lemmas) then
		return
	end
	local pagename, categories = data.pagename, {"multi"}
	for _, lemma_obj in ipairs(data.lemmas) do
		local term = lemma_obj.term
		if term then
			handle_lemma(pagename, term, categories)
		end
	end
	return categories
end

local cat_functions = {
	-- This function is invoked for plurals by an entry in [[Module:form of/cats]].
	["en-irregular-plural-categories"] = irregular_plural_categories,
}

-- We need to return the irreg_plurals structure so that the category handler in
-- [[Module:category tree/poscatboiler/data/lang-specific/en]] can access it.
return {cat_functions = cat_functions, irregular_plurals = irregular_plurals}