مۆدیوول:form of/lang-data/en/functions
ڕواڵەت
"بەڵگەدارکردنی ئەم مۆدیوولە دەکرێ لە مۆدیوول:form of/lang-data/en/functions/docدا دروست بکرێ"
--[=[
This module contains lang-specific functions for English.
TODO:
* Handle alternative forms in umlaut more elegantly.
* Enable multiple categories (e.g. umlaut and plural in -n).
* Handle dobuled-consonants with Germanic plurals (e.g. "Yid" to "Yidden").
* Handle trivial consonant changes in umlaut plurals (e.g. "cow" to "kine").
]=]
local en_utilities_module = "Module:en-utilities"
local headword_data_module = "Module:headword/data"
local links_module = "Module:links"
local load_module = "Module:load"
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
local utilities_module = "Module:utilities"
local ipairs = ipairs
local require = require
local toNFD = mw.ustring.toNFD
local type = type
--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
local function get_fragment(...)
get_fragment = require(links_module).get_fragment
return get_fragment(...)
end
local function get_plaintext(...)
get_plaintext = require(utilities_module).get_plaintext
return get_plaintext(...)
end
local function insert_if_not(...)
insert_if_not = require(table_module).insertIfNot
return insert_if_not(...)
end
local function is_regular_plural(...)
is_regular_plural = require(en_utilities_module).is_regular_plural
return is_regular_plural(...)
end
local function load_data(...)
load_data = require(load_module).load_data
return load_data(...)
end
local function pattern_escape(...)
pattern_escape = require(string_utilities_module).pattern_escape
return pattern_escape(...)
end
local function remove_possessive(...)
remove_possessive = require(en_utilities_module).remove_possessive
return remove_possessive(...)
end
local function split(...)
split = require(string_utilities_module).split
return split(...)
end
local function u(...)
u = require(string_utilities_module).char
return u(...)
end
local function ugsub(...)
ugsub = require(string_utilities_module).gsub
return ugsub(...)
end
local function ulower(...)
ulower = require(string_utilities_module).lower
return ulower(...)
end
local function umatch(...)
umatch = require(string_utilities_module).match
return umatch(...)
end
--[==[
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
local diacritics
local function get_diacritics()
diacritics, get_diacritics = load_data(headword_data_module).page.comb_chars.diacritics_all .. "*", nil
return diacritics
end
local vowels
local function get_vowels()
vowels, get_vowels = require(en_utilities_module).vowels, nil
return vowels
end
----------------------- Category functions -----------------------
-- Double the final consonant of a term if preceded by a single vowel, which is common with Germanic suffixes.
local function double_final_consonant(term)
local stem, final = term:match("^(.*)([bcdfghjklmnpqrstvxz])$") -- Include "h" and "x" just in case.
if not stem then
return term
end
diacritics = diacritics or get_diacritics()
stem = umatch(ulower(toNFD(stem)), "^(.-)" .. diacritics .. "[" .. (vowels or get_vowels()) .. "]" .. diacritics .. "$")
if not stem then
return term
end
-- If not preceded by a vowel (treating "y" as a consonant if not preceded
-- by another consonant), then the final consonant is doubled.
if stem == "" or stem == "y" or umatch(stem, "[" .. vowels .. "]" .. diacritics .. "y$") or umatch(stem, "[^" .. vowels .. "]$") then
return term .. final
end
return term
end
local function check_germanic_suffix(pagename, lemma, stem)
-- Usually requires an epenthetic "e" (e.g. "foxen"), but not always
-- (e.g. "feathern").
if lemma == stem or lemma .. "e" == stem then
return true
end
-- Final -er sometimes becomes -ren (e.g. "sistren").
local reduced = lemma:match("^(.-)er$")
if reduced and (reduced .. "re") == stem then
return true
end
local doubled = double_final_consonant(lemma)
return doubled .. "e" == stem
end
-- List of umlaut plurals. Each entry is of the form {SINGULAR, PLURAL}, where any lemma ending in SINGULAR whose plural
-- ends in PLURAL are counted (hence [[dormouse]] plural [[dormice]] is counted). The entries are Lua patterns.
local umlaut_plurals = {
-- [[mouse]] -> [[mice]], [[louse]] -> [[lice]], jocular [[house]] -> [[hice]], [[spouse]] -> [[spice]]
{"ouse", "ice"},
-- [[goose]] -> [[geese]], [[swoose]] -> [[sweese]], jocular [[moose]] -> [[meese]]
{"oose", "eese"},
{"an", "en"},
{"ann", "enn"},
{"anne", "enne"},
{"oot", "eet"},
{"oote", "eete"},
{"ooth", "eeth"},
{"oof", "eef"},
{"other", "ethren"},
{"other", "ethern"},
{"other", "etheren"},
{"ow", "[iy]e?"},
{"ow", "[iy]ne"},
}
--[=[
The key `cat` must be specified and is the name of the category following the language name. Suffixes enclosed in
double angle brackets, e.g. <<-ata>>, are italicized (as if written e.g. {{m|en||-ata}}) in the displayed title, but
not in the category name itself. The description of the category comes from the `description` field; if omitted, it is
constructed from the category by adding "English irregular" to the beginning and appending the value of `desc_suffix`
(if given) to the end. Suffixes enclosed in double angle brackets are italicized, as described above, and template
calls are permitted.
The key `matches_plural` must be specified and is either a string or a function. If a string, the string is a Lua
pattern that should match the end of the pagename, and the remainder becomes the stem passed to `matches_lemma` (see
below). If a function, it should accept two arguments, the pagename and the lemma (or more precisely, the words in the
pagename and lemma that differ, if there are multiple words), and should return the stem of the pagename (minus the
ending) if the pagename matches the ending, otherwise nil.
The key `matches_lemma` must be specified and is either a string or a function. If a string, the string is a Lua
pattern that should match the lemma. If a function, it should accept three arguments, the pagename and lemma as in
`matches_plural`, and the stem returned by `matches_plural` or extracted from the pagename and ending. It should return
a boolean indicating whether the lemma matches.
The key `additional`, if given, is additional text to include in the category description as displayed on the page
itself, but not in the summary of the category as displayed on other pages. For further information, see the
`additional` field in [[Module:category tree/poscatboiler/data/documentation]].
The key `breadcrumb`, if given, is the breadcrumb text. See [[Module:category tree/poscatboiler/data/documentation]].
If omitted, the breadcrumb is constructed from the category name by remvoing "plurals in" from the beginning of the
category name.
The key `sort_key`, if given, specifies the sort key for the category in its parent category
[[:Category:English irregular plurals]]. By default it is derived from the breadcrumb by removing an initial hyphen.
If a plural doesn't match any of the entries, it goes into [[:Category:English miscellaneous irregular plurals]]. Note
that before checking these entries, plurals that are the same as the singular are excluded (i.e. not considered
irregular), as are plurals formed from the singular by adding [[-s]], [[-es]], [[-'s]] or [[-ses]] (if the singular ends
in '-s'; cf. [[bus]] -> 'busses', [[dis]] -> 'disses'), or by replacing final [[-y]] with [[-ies]].
]=]
local irregular_plurals = {
{ -- siphon off "women" plurals 'English plurals in -men with singular in -man'
cat = "plurals in <<-women>> with singular in <<-woman>>",
additional = "Plurals formed by replacing a final <<-man>> with a final <<-men>> are found in [[:Category:English plurals in -men with singular in -man|Category:English plurals in ''-men'' with singular in ''-man'']].",
matches_plural = "[Ww]omen",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. pagename:sub(-5, -3) .. "an"
end,
},
{ -- siphon off most of the "umlaut" plurals that will otherwise end up in 'English miscellaneous irregular plurals'
cat = "plurals in <<-men>> with singular in <<-man>>",
additional = "Plurals formed by replacing a final <<-woman>> with a final <<-women>> are found in [[:Category:English plurals in -women with singular in -woman|Category:English plurals in ''-women'' with singular in ''-woman'']].",
matches_plural = "[Mm]en",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. pagename:sub(-3, -3) .. "an"
end,
},
{
cat = "plurals with umlaut",
description = "{{{langname}}} irregular noun plurals that are formed via [[umlaut]], i.e. by changing the root vowel rather than adding a suffix.",
additional = [==[See also:
* Plurals formed by replacing a final <<-man>> with <<-men>> are found in [[:Category:English plurals in -men with singular in -man|Category:English plurals in ''-men'' with singular in ''-man'']]
* Plurals formed by replacing a final <<-woman>> with a final <<-women>> are found in [[:Category:English plurals in -women with singular in -woman|Category:English plurals in ''-women'' with singular in ''-woman'']]]==],
sort_key = "umlaut",
matches_plural = function(pagename, lemma)
for _, umlaut_plural in ipairs(umlaut_plurals) do
local stem = umatch(lemma, "^(.-)%f[" .. (vowels or get_vowels()) .. "]" .. umlaut_plural[1] .. "$")
if stem and (
umatch(pagename, "^" .. pattern_escape(stem) .. umlaut_plural[2] .. "$") or
-- FIXME: this is really hacky ("cow" to "kyne").
umatch(pagename, "^" .. pattern_escape(stem):gsub("c", "k") .. umlaut_plural[2] .. "$")
) then
return stem
end
end
end,
matches_lemma = function(pagename, lemma, stem)
-- All the work already done in matches_plural().
return true
end,
},
{
cat = "plurals in <<-tia>> with singular in <<-s>>",
desc_suffix = ", mostly originating from Latin participles",
matches_plural = "tia",
matches_lemma = "s",
},
{
cat = "plurals in <<-ia>> with singular in <<-e>>",
desc_suffix = ", mostly originating from Latin neuter nouns",
matches_plural = "ia",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "e"
end,
},
{
cat = "plurals in <<-ia>> with singular in <<-i>> or <<-y>>",
desc_suffix = ", mostly originating from Greek neuter nouns",
matches_plural = "ia",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "i" or lemma == stem .. "y"
end,
},
{
cat = "plurals in <<-ina>> with singular in <<-en>>",
desc_suffix = ", mostly originating from Latin neuter nouns",
additional = [==[
* Plurals formed by replacing a final <<-inum>> with a final <<-ina>> are found in [[:Category:English plurals in -a with singular in -um|Category:English plurals in ''-a'' with singular in ''-um'']].
* Plurals formed by replacing a final <<-inon>> with a final <<-ina>> are found in [[:Category:English plurals in -a with singular in -on|Category:English plurals in ''-a'' with singular in ''-on'']].]==],
matches_plural = "ina",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "en"
end,
},
{
cat = "plurals in <<-ra>> with singular in <<-s>>",
desc_suffix = ", mostly originating from Latin neuter nouns",
additional = [==[Sometimes the preceding vowel changes; e.g. <<-us>> commonly changes to <<-era>> or <<-ora>> in the plural.
* Plurals formed by replacing a final <<-rum>> with a final <<-ra>> are found in [[:Category:English plurals in -a with singular in -um|Category:English plurals in ''-a'' with singular in ''-um'']].
* Plurals formed by replacing a final <<-ron>> with a final <<-ra>> are found in [[:Category:English plurals in -a with singular in -on|Category:English plurals in ''-a'' with singular in ''-on'']].]==],
matches_plural = "ra",
matches_lemma = function(pagename, lemma, stem)
return lemma:find("s$")
end,
},
{
cat = "plurals in <<-ata>> with singular in <<-a>> or <<-e>>",
desc_suffix = ", mostly originating from Ancient Greek neuter nouns in {{m|grc|-μᾰ}}",
matches_plural = "ata",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "a" or lemma == stem .. "e"
end,
},
{
cat = "plurals in <<-ata>> with singular in <<-as>>",
desc_suffix = ", mostly originating from Ancient Greek neuter nouns",
matches_plural = "ata",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "as"
end,
},
{
cat = "plurals in <<-au>>",
desc_suffix = ", mostly originating from Welsh",
matches_plural = "au",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem
end,
},
{
cat = "plurals in <<-a>> with singular in <<-an>>",
desc_suffix = ", mostly originating from Latin or Greek neuter nouns",
additional = [==[See also:
* Plurals formed by replacing a final <<-um>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -um|Category:English plurals in ''-a'' with singular in ''-um'']].
* Plurals formed by replacing a final <<-on>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -on|Category:English plurals in ''-a'' with singular in ''-on'']].]==],
matches_plural = "a",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "an"
end,
},
{
cat = "plurals in <<-a>> with singular in <<-on>>",
desc_suffix = ", mostly originating from Greek or Latin nouns",
additional = [==[See also:
* Plurals formed by replacing a final <<-um>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -um|Category:English plurals in ''-a'' with singular in ''-um'']].
* Plurals formed by replacing a final <<-an>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -an|Category:English plurals in ''-a'' with singular in ''-an'']].]==],
matches_plural = "a",
matches_lemma = function(pagename, lemma, stem)
return umatch(toNFD(lemma), "^" .. pattern_escape(toNFD(stem)) .. "o" .. (diacritics or get_diacritics()) .. "n$")
end
},
{
cat = "plurals in <<-a>> with singular in <<-um>>",
desc_suffix = ", mostly originating from Latin nouns",
additional = [==[See also:
* Plurals formed by replacing a final <<-on>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -on|Category:English plurals in ''-a'' with singular in ''-on'']].
* Plurals formed by replacing a final <<-an>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -an|Category:English plurals in ''-a'' with singular in ''-an'']].]==],
matches_plural = "a",
matches_lemma = function(pagename, lemma, stem)
return umatch(toNFD(lemma), "^" .. pattern_escape(toNFD(stem)) .. "u" .. (diacritics or get_diacritics()) .. "m$")
end
},
{
cat = "plurals in <<-a>>",
desc_suffix = ", mostly originating from Greek or Latin nouns",
additional = [==[See also:
* Plurals formed by replacing a final <<-um>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -um|Category:English plurals in ''-a'' with singular in ''-um'']].
* Plurals formed by replacing a final <<-on>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -on|Category:English plurals in ''-a'' with singular in ''-on'']].
* Plurals formed by replacing a final <<-an>> with a final <<-a>> are found in [[:Category:English plurals in -a with singular in -an|Category:English plurals in ''-a'' with singular in ''-an'']].]==],
matches_plural = "a",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem
end,
},
{
cat = "plurals in <<-ae>> with singular in <<-a>>",
desc_suffix = ", mostly originating from Latin feminine nouns",
additional = "The <<-ae>> can also be written as a ligature <<-æ>>.",
matches_plural = function(pagename, lemma)
return pagename:match("^(.*)ae$") or pagename:match("^(.*)æ$")
end,
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "a"
end,
},
{
cat = "plurals in <<-ae>> with singular in <<-e>>",
desc_suffix = ", mostly originating from Greek feminine nouns",
additional = "The <<-ae>> can also be written as a ligature <<-æ>>.",
matches_plural = function(pagename, lemma)
return pagename:match("^(.*)ae$") or pagename:match("^(.*)æ$")
end,
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "e"
end,
},
{
cat = "plurals in <<-ae>>",
desc_suffix = ", mostly originating from Latin and Greek feminine nouns",
additional = [==[The <<-ae>> can also be written as a ligature <<-æ>>. See also:
* [[:Category:English plurals in -ae with singular in -a|Category:English plurals in ''-ae'' with singular in ''-a'']]
* [[:Category:English plurals in -ae with singular in -e|Category:English plurals in ''-ae'' with singular in ''-e'']]]==],
matches_plural = function(pagename, lemma)
return pagename:match("^(.*)ae$") or pagename:match("^(.*)æ$")
end,
matches_lemma = function(pagename, lemma, stem)
return lemma == stem
end,
},
{ -- siphon off most of the "-people" plurals that will otherwise end up in 'English miscellaneous irregular plurals'
cat = "plurals in <<-people>> with singular in <<-person>>",
matches_plural = "people",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "person"
end,
},
{
cat = "plurals in <<-e>> with singular in <<-a>> or <<-ia>>",
desc_suffix = ", mostly originating from Italian feminine nouns",
matches_plural = "e",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "a" or
lemma == stem .. "ia" or
(stem:match("[cg]h$") and lemma == stem:sub(1, -2) .. "a")
end,
},
{
cat = "plurals in <<-e>>",
desc_suffix = ", mostly originating from German masculine or neuter nouns",
additional = "These are formed by adding <<-e>>. See also [[:Category:English plurals in -e with singular in -a or -ia|Category:English plurals in ''-e'' with singular in ''-a'' or ''-ia'']].",
matches_plural = "e",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem or double_final_consonant(lemma) == stem
end,
},
{
cat = "plurals in <<-oth>>",
desc_suffix = ", mostly originating from Hebrew feminine nouns",
matches_plural = "oth",
matches_lemma = function(pagename, lemma, stem)
return true
end,
},
{
cat = "plurals in <<-ai>> with singular in <<-a>> or <<-e>>",
desc_suffix = ", mostly originating from Greek feminine nouns",
matches_plural = "ai",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "a" or lemma == stem .. "e"
end,
},
{
cat = "plurals in <<-ai>> with singular in <<-es>> or <<-is>>",
desc_suffix = ", mostly originating from Greek masculine nouns",
matches_plural = "ai",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "es" or lemma == stem .. "is"
end,
},
{
cat = "plurals in <<-oi>> with singular in <<-os>>",
desc_suffix = ", mostly originating from Greek masculine nouns",
matches_plural = "oi",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "os"
end,
},
{
cat = "plurals in <<-i>> with singular in <<-os>>",
desc_suffix = ", mostly originating from Greek or Latin nouns",
additional = "Plurals formed by replacing a final <<-us>> with a final <<-i>> are found in [[:Category:English plurals in -i with singular in -us|Category:English plurals in ''-i'' with singular in ''-us'']].",
matches_plural = "i?i",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "os" or (lemma == stem .. "ios" and stem .. "i" ~= pagename)
end,
},
{
cat = "plurals in <<-i>> with singular in <<-us>>",
desc_suffix = ", mostly originating from Latin nouns",
additional = "Plurals formed by replacing a final <<-os>> with a final <<-i>> are found in [[:Category:English plurals in -i with singular in -os|Category:English plurals in ''-i'' with singular in ''-os'']].",
matches_plural = "i?i",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "us" or (lemma == stem .. "ius" and stem .. "i" ~= pagename)
end,
},
{
cat = "plurals in <<-i>> with singular in <<-a>> or <<-ia>>",
desc_suffix = ", mostly originating from Russian or Ukrainian masculine and feminine nouns or Italian masculine nouns",
matches_plural = "i",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "a" or
lemma == stem .. "ia" or
(stem:match("[cg]h$") and lemma == stem:sub(1, -2) .. "a")
end,
},
{
cat = "plurals in <<-i>> with singular in <<-e>>",
desc_suffix = ", mostly originating from Italian masculine nouns",
matches_plural = "i",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "e"
end,
},
{
cat = "plurals in <<-i>> with singular in <<-o>> or <<-io>>",
desc_suffix = ", mostly originating from Italian masculine nouns",
matches_plural = "i",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "o" or
lemma == stem .. "io" or
(stem:match("[cg]h$") and lemma == stem:sub(1, -2) .. "o")
end,
},
{
cat = "plurals in <<-i>>",
additional = [==[These are formed by adding <<i>>, or changing <<-a>> to <<-i>>. See also:
* [[:Category:English plurals in -i with singular in -us|Category:English plurals in ''-i'' with singular in ''-us'']]
* [[:Category:English plurals in -i with singular in -os|Category:English plurals in ''-i'' with singular in ''-os'']]
* [[:Category:English plurals in -i with singular in -o or -io|Category:English plurals in ''-i'' with singular in ''-o'' or ''-io'']]
* [[:Category:English plurals in -i with singular in -e|Category:English plurals in ''-i'' with singular in ''-e'']]]==],
matches_plural = "i",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem
end,
},
{ -- siphon off most of the "-sful" plurals that will otherwise end up in 'English miscellaneous irregular plurals'
cat = "plurals in <<-sful>> with singular in <<-ful>>",
additional = "This includes examples such as {{m|en|teaspoonful}}, plural {{m|en|teaspoonsful}}. Generally "
.. "these refer to specific measures. Note that not all nouns in <<-ful>> pluralize this way; e.g. the "
.. "plural of {{m|en|handful}} is normally {{m|en|handfuls}} (but {{m|en|handsful}} is possible, if rare).",
matches_plural = "sful",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "ful" or
-- [[boxesful]], [[brushesful]], [[busesful]], [[classesful]], [[dishesful]], [[glassesful]], etc.
stem:find("e$") and lemma == stem:gsub("e$", "") .. "ful" or
-- [[bakeriesful]], [[belliesful]], [[galleriesful]], [[librariesful]], [[pantriesful]], etc.
stem:find("ie$") and lemma == stem:gsub("ie$", "y") .. "ful"
end,
},
{
cat = "plurals in <<-im>>",
desc_suffix = ", mostly originating from Hebrew masculine nouns",
additional = "Generally these are formed by simply adding <<-im>>, or <<-m>> if the singular ends in <<-i>> ({{m|en|illui}} – {{m|en|illuim}}; but cf. {{m|en|goiim}}). Some changes that may occur are <<-e->> to <<-a->> or vice versa ({{m|en|heder}} – {{m|en|hadarim}}; {{m|en|gaon}} – {{m|en|geonim}}), <<-f>> to <<-v->> ({{m|en|ganef}} – {{m|en|ganevim}}), and <<-s>> to <<-t->> ({{m|en|balabos}} – {{m|en|balabatim}}).",
matches_plural = "im",
matches_lemma = function(pagename, lemma, stem)
return true
end,
},
{
cat = "plurals in <<-children>> with singular in <<-child>>",
matches_plural = "[Cc]hildren",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. pagename:sub(-8, -4)
end,
},
{
cat = "plurals in <<-in>>",
desc_suffix = ", mostly originating from Hebrew and Arabic masculine nouns",
matches_plural = "in",
matches_lemma = function(pagename, lemma, stem)
return true
end,
},
{
cat = "plurals in <<-n>>",
desc_suffix = ", mostly originating from Germanic nouns",
additional = [==[See also:
* Plurals formed by replacing a final <<-man>> with <<-men>> are found in [[:Category:English plurals in -men with singular in -man|Category:English plurals in ''-men'' with singular in ''-man'']]
* Plurals formed by replacing a final <<-woman>> with a final <<-women>> are found in [[:Category:English plurals in -women with singular in -woman|Category:English plurals in ''-women'' with singular in ''-woman'']]
* Plurals formed by replacing a final <<-child>> with <<-children>> are found in [[:Category:English plurals in -children with singular in -child|Category:English plurals in ''-children'' with singular in ''-child'']]]==],
matches_plural = "n",
matches_lemma = check_germanic_suffix,
},
{
cat = "plurals in <<-ar>>",
desc_suffix = ", mostly originating from North Germanic nouns",
matches_plural = "ar",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem
end,
},
{
cat = "plurals in <<-ir>>",
desc_suffix = ", mostly originating from North Germanic nouns",
matches_plural = "ir",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem
end,
},
{
cat = "plurals in <<-or>> with singular in <<-a>>",
desc_suffix = ", mostly originating from Swedish nouns",
matches_plural = "or",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "a"
end,
},
{
cat = "plurals in <<-r>>",
desc_suffix = ", mostly originating from Germanic nouns",
matches_plural = "r",
matches_lemma = check_germanic_suffix,
},
{
cat = "plurals in <<-bes>> with singular in <<-bs>>",
desc_suffix = ", mostly originating from Latin or Greek masculine or feminine nouns",
matches_plural = "bes",
matches_lemma = "bs",
},
{
cat = "plurals in <<-ces>> with singular in <<-x>>",
desc_suffix = ", mostly originating from Latin masculine or feminine nouns",
additional = "Generally these are formed by changing a final <<-x>> into <<-ces>> or a final <<-ex>> into <<-ices>>.",
matches_plural = "ces",
matches_lemma = "x",
},
{
cat = "plurals in <<-des>> with singular in <<-d>>",
desc_suffix = ", mostly originating from Latin or Greek masculine or feminine nouns, or Spanish feminine nouns",
matches_plural = "des",
matches_lemma = "d",
},
{
cat = "plurals in <<-des>> with singular in <<-s>>",
desc_suffix = ", mostly originating from Latin or Greek masculine or feminine nouns",
matches_plural = "des",
matches_lemma = "s",
},
{
cat = "plurals in <<-ges>> with singular in <<-x>>",
desc_suffix = ", mostly originating from Greek masculine or feminine nouns",
matches_plural = "ges",
matches_lemma = "x",
},
{
cat = "plurals in <<-ies>> with singular in <<-ey>>",
matches_plural = "ies",
matches_lemma = "ey",
},
{
cat = "plurals in <<-ies>> with singular in <<-i>>",
matches_plural = "ies",
matches_lemma = "i",
},
{
cat = "plurals in <<-kes>> with singular in <<-x>>",
desc_suffix = ", mostly originating from Greek masculine or feminine nouns",
matches_plural = "kes",
matches_lemma = "x",
},
{
cat = "plurals in <<-ines>> with singular in <<-o>>",
desc_suffix = ", mostly originating from Latin masculine or feminine nouns",
matches_plural = "ines",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "o"
end,
},
{
cat = "plurals in <<-ones>> with singular in <<-o>>",
desc_suffix = ", mostly originating from Latin masculine or feminine nouns",
matches_plural = "ones",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "o"
end,
},
{
-- cf. [[eikon]] pl. [[eikones]]
cat = "plurals in <<-ones>> with singular in <<-on>>",
desc_suffix = ", mostly originating from Greek or Spanish masculine nouns",
matches_plural = "ones",
matches_lemma = function(pagename, lemma, stem)
return umatch(toNFD(lemma), "^" .. pattern_escape(toNFD(stem)) .. "o" .. (diacritics or get_diacritics()) .. "n$")
end
},
{
cat = "plurals in <<-oes>> with singular in <<-o>>",
matches_plural = "oes",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "o"
end,
},
{
-- cf. [[levator]] pl. [[levatores]]
cat = "plurals in <<-ores>> with singular in <<-or>>",
desc_suffix = ", mostly originating from Latin masculine nouns",
matches_plural = "ores",
matches_lemma = function(pagename, lemma, stem)
return umatch(toNFD(lemma), "^" .. pattern_escape(toNFD(stem)) .. "o" .. (diacritics or get_diacritics()) .. "r$")
end,
},
{
cat = "plurals in <<-pes>> with singular in <<-ps>>",
desc_suffix = ", mostly originating from Latin or Greek masculine or feminine nouns",
matches_plural = "pes",
matches_lemma = "ps",
},
{
cat = "plurals in <<-res>> with singular in <<-r>>",
desc_suffix = ", mostly originating from Greek, Latin, Portuguese, or Spanish masculine nouns",
additional = "See also [[:Category:English plurals in -ores with singular in -or|Category:English plurals in ''-ores'' with singular in ''-or'']].",
matches_plural = "res",
matches_lemma = "r",
},
{
cat = "plurals in <<-tes>> with singular in <<-s>>",
desc_suffix = ", mostly originating from Latin or Greek masculine or feminine nouns",
matches_plural = "tes",
matches_lemma = "s",
},
{
cat = "plurals in <<-ues>> with singular in <<-u>>",
matches_plural = "ues",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "u"
end,
},
{
cat = "plurals in <<-ves>> with singular in <<-f>> or <<-fe>>",
desc_suffix = ", mostly originating from native English formations",
matches_plural = "ves",
matches_lemma = "fe?",
},
{
cat = "plurals with <<-xx->> instead of <<-x->>",
desc_suffix = ", which are typically neologisms",
matches_plural = "xxes",
matches_lemma = function(pagename, lemma, stem)
return is_regular_plural(stem .. "xes", lemma, "noun+")
end,
},
{
cat = "plurals in <<-es>> with singular in <<-is>>",
desc_suffix = ", mostly originating from Greek feminine nouns, or analogous formations",
matches_plural = "es",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "is"
end,
},
{
cat = "plurals in <<-es>> where <<-s>> is expected",
desc_suffix = ", which chiefly occur in Early Modern English",
matches_plural = "es",
matches_lemma = function(pagename, lemma, stem)
return is_regular_plural(stem .. "s", lemma, "noun+")
end,
},
{
cat = "plurals in <<-eis>> with singular in <<-is>>",
desc_suffix = ", mostly originating from Greek feminine nouns",
matches_plural = "eis",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "is"
end,
},
{
cat = "plurals in <<-s>> where <<-ies>> is expected",
desc_suffix = ", which chiefly occur on an ad hoc basis",
matches_plural = "ys",
matches_lemma = function(pagename, lemma, stem)
return is_regular_plural(stem .. "ies", lemma, "noun+")
end,
},
{
cat = "plurals in <<-'s>>",
desc_suffix = ", mostly used where plurals ending in <<-s>> would appear strange or cause confusion",
matches_plural = function(pagename, lemma)
local stem = remove_possessive(pagename)
return stem ~= pagename and stem or nil
end,
matches_lemma = function(pagename, lemma, stem)
return lemma == stem
end,
},
{
cat = "plurals in <<-s>> where <<-es>> is expected",
desc_suffix = ", which chiefly occur on an ad hoc basis",
matches_plural = "s",
matches_lemma = function(pagename, lemma, stem)
return is_regular_plural(stem .. "es", lemma, "noun+")
end,
},
{
cat = "plurals in <<-ot>>",
desc_suffix = ", mostly originating from Hebrew feminine nouns",
matches_plural = "ot",
matches_lemma = function(pagename, lemma, stem)
return true
end,
},
{
cat = "plurals in <<-x>> with singular in <<-c>>, <<-ck>> or <<-k>>",
desc_suffix = ", mostly as slang forms of plurals ending in <<-s>>",
matches_plural = "x",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "c" or lemma == stem .. "ck" or lemma == stem .. "k"
end,
},
{
cat = "plurals in <<-x>>",
desc_suffix = ", mostly originating from French masculine nouns",
additional = "Generally these are formed by adding <<-x>> to a noun ending in <<-u>>; changing final <<-al>> or <<-ail>> to <<-aux>>; or changing final <<-el>> to <<-eaux>>.",
matches_plural = "x",
matches_lemma = "[lu]",
},
{
cat = "plurals in <<-y>> with singular in <<-a>>",
desc_suffix = ", mostly originating from Polish feminine nouns, Russian or Ukrainian masculine and feminine nouns, or Czech masculine nouns",
matches_plural = "y",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem .. "a"
end,
},
{
cat = "plurals in <<-y>>",
desc_suffix = ", mostly originating from Polish feminine nouns, Russian or Ukrainian masculine and feminine nouns, or Czech masculine nouns",
additional = [==[The stem may be reduced (e.g. {{m|en|khokhol}} – {{m|en|khokhly}}). Plurals formed by replacing a final <<-a>> with <<-y>> are found in [[:Category:English plurals in -y with singular in -a|Category:English plurals in ''-y'' with singular in ''-a'']]]==],
matches_plural = "y",
matches_lemma = function(pagename, lemma, stem)
if lemma == stem then
return true
end
-- Try again with vowel reduction in the lemma.
vowels = vowels or get_vowels()
diacritics = diacritics or get_diacritics()
lemma = ugsub(lemma, "([^" .. vowels .. "]" .. diacritics .. ")[" .. vowels .. "]" .. diacritics .. "([^" .. vowels .. "]+)$", "%1%2")
return lemma == stem
end,
},
{
cat = "plurals in <<-z>>",
desc_suffix = ", mostly as slang forms of plurals ending in <<-s>>",
matches_plural = "z",
matches_lemma = function(pagename, lemma, stem)
return lemma == stem
end,
},
}
-- Check a given word. Increment `diff` if the pagename word is different and isn't possessive.
local function handle_word(pagename_word, lemma_word, categories, diff)
if pagename_word == lemma_word then
return diff
end
local pagename_nonposs, poss = remove_possessive(pagename_word)
if pagename_nonposs ~= pagename_word then
local lemma_nonposs = remove_possessive(lemma_word)
if lemma_nonposs ~= lemma_word then
poss, pagename_word, lemma_word = true, pagename_nonposs, lemma_nonposs
end
end
if diff == 1 and not poss then
return diff + 1
elseif is_regular_plural(pagename_word, lemma_word, "noun+") then
return diff + (poss and 0 or 1)
end
local is_match
for _, irreg_plural in ipairs(irregular_plurals) do
local matches_plural, stem = irreg_plural.matches_plural
if type(matches_plural) == "string" then
stem = umatch(pagename_word, "^(.-)" .. matches_plural .. "$")
else
stem = matches_plural(pagename_word, lemma_word)
end
if stem then
local matches_lemma = irreg_plural.matches_lemma
if type(matches_lemma) == "string" then
is_match = umatch(lemma_word, matches_lemma .. "$")
else
is_match = matches_lemma(pagename_word, lemma_word, stem)
end
if is_match then
local cat = irreg_plural.cat:gsub("<<(.-)>>", "%1")
insert_if_not(categories, cat)
break
end
end
end
if not is_match then
insert_if_not(categories, "miscellaneous irregular plurals")
end
return diff + (poss and 0 or 1)
end
local function handle_lemma(pagename, lemma, categories)
lemma = get_fragment(get_plaintext(lemma))
if lemma == pagename then
return
end
local pagename_words, lemma_words = split(pagename, "([-%s])"), split(lemma, "([-%s])")
-- Different number of words.
if #pagename_words ~= #lemma_words then
return insert_if_not(categories, "miscellaneous irregular plurals")
end
local lemma_categories, diff = {}, 0
for i = 1, #pagename_words, 2 do
diff = handle_word(pagename_words[i], lemma_words[i], lemma_categories, diff)
-- If two (non-possessive) words differ, only add the miscellaneous category.
if diff == 2 then
for j = 2, #categories do
categories[j] = nil
end
categories[2] = "miscellaneous irregular plurals"
return
end
end
for _, cat in ipairs(lemma_categories) do
insert_if_not(categories, cat)
end
end
local function irregular_plural_categories(data)
if not (data.pagename and data.lemmas) then
return
end
local pagename, categories = data.pagename, {"multi"}
for _, lemma_obj in ipairs(data.lemmas) do
local term = lemma_obj.term
if term then
handle_lemma(pagename, term, categories)
end
end
return categories
end
local cat_functions = {
-- This function is invoked for plurals by an entry in [[Module:form of/cats]].
["en-irregular-plural-categories"] = irregular_plural_categories,
}
-- We need to return the irreg_plurals structure so that the category handler in
-- [[Module:category tree/poscatboiler/data/lang-specific/en]] can access it.
return {cat_functions = cat_functions, irregular_plurals = irregular_plurals}