diff --git a/docs/en/index.rst b/docs/en/index.rst index 29866f4d..8cd76827 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -38,6 +38,7 @@ you want to create an inflector for to the ``createForLanguage()`` method: The supported languages are as follows: +- ``Language::DUTCH`` - ``Language::ENGLISH`` - ``Language::FRENCH`` - ``Language::NORWEGIAN_BOKMAL`` diff --git a/lib/Doctrine/Inflector/InflectorFactory.php b/lib/Doctrine/Inflector/InflectorFactory.php index a0740a74..d4053ed3 100644 --- a/lib/Doctrine/Inflector/InflectorFactory.php +++ b/lib/Doctrine/Inflector/InflectorFactory.php @@ -4,6 +4,7 @@ namespace Doctrine\Inflector; +use Doctrine\Inflector\Rules\Dutch; use Doctrine\Inflector\Rules\English; use Doctrine\Inflector\Rules\French; use Doctrine\Inflector\Rules\NorwegianBokmal; @@ -24,6 +25,9 @@ public static function create(): LanguageInflectorFactory public static function createForLanguage(string $language): LanguageInflectorFactory { switch ($language) { + case Language::DUTCH: + return new Dutch\InflectorFactory(); + case Language::ENGLISH: return new English\InflectorFactory(); diff --git a/lib/Doctrine/Inflector/Language.php b/lib/Doctrine/Inflector/Language.php index 78c3cc75..7ab66f78 100644 --- a/lib/Doctrine/Inflector/Language.php +++ b/lib/Doctrine/Inflector/Language.php @@ -6,6 +6,7 @@ final class Language { + public const DUTCH = 'dutch'; public const ENGLISH = 'english'; public const FRENCH = 'french'; public const NORWEGIAN_BOKMAL = 'norwegian-bokmal'; diff --git a/lib/Doctrine/Inflector/Rules/Dutch/Inflectible.php b/lib/Doctrine/Inflector/Rules/Dutch/Inflectible.php new file mode 100644 index 00000000..04f96003 --- /dev/null +++ b/lib/Doctrine/Inflector/Rules/Dutch/Inflectible.php @@ -0,0 +1,169 @@ +medeklinker = '(' . $this->missingFromWiki . '|' . $this->plofKlank . '|' . $this->wrijfKlank . '|' . $this->neusKlank . '|' . $this->vloeiKlank . '|' . $this->glijKlank . '|' . $this->affricate . ')'; + * $this->medeklinker = '((c|h|p|q|x|y|z)|(p|t|k|b|d)|(f|s|ch|sj|v|z|g|j)|(m|n|ng)|(l|r)|(j|w)|(ts|zz|tsj|g))'; + * } + */ +class Inflectible +{ + /** + * @return iterable + */ + public static function getSingular(): iterable + { + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Klinkerverandering + yield new Transformation(new Pattern('()heden$'), '\1heid'); + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Beroepen_eindigend_op_-man + yield new Transformation(new Pattern('()mannen$'), '\1man'); + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Latijnse_meervoudsvormen + yield new Transformation(new Pattern('()ices$'), '\1ex'); + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Stapelmeervoud + yield new Transformation(new Pattern('^(ei|gemoed|goed|hoen|kind|lied|rad|rund)eren$'), '\1'); + + // http://nl.wikipedia.org/wiki/Nederlandse_grammatica + yield new Transformation(new Pattern('()ijen$'), '\1ij'); + + yield new Transformation(new Pattern('()ieen$'), '\1ie'); // ën + + yield new Transformation(new Pattern('()((a|e|i|o|u|ij))s$'), '\1\2'); + + yield new Transformation(new Pattern('()((s)s)en$'), '\1s'); + + yield new Transformation(new Pattern('()((c|h|p|q|x|y|z)|(p|t|k|b|d)|(f|s|ch|sj|v|z|g|j)|(m|n|ng)|(l|r)|(j|w)|(ts|zz|tsj|g))en$'), '\1\2'); + } + + /** + * @return iterable + */ + public static function getPlural(): iterable + { + // @todo already in plural (?) + // @todo refine + yield new Transformation(new Pattern('()(e)(s)$'), '\1\2\3\3en'); + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Klinkerverandering + yield new Transformation(new Pattern('()heid$'), '\1heden'); + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Beroepen_eindigend_op_-man + yield new Transformation(new Pattern('()man$'), '\1mannen'); + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Latijnse_meervoudsvormen + yield new Transformation(new Pattern('()ix$'), '\1ices'); + + yield new Transformation(new Pattern('()ex$'), '\1ices'); + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Stapelmeervoud + yield new Transformation(new Pattern('^(ei|gemoed|goed|hoen|kind|lied|rad|rund)$'), '\1eren'); + + // http://nl.wikipedia.org/wiki/Nederlandse_grammatica + yield new Transformation(new Pattern('()ij$'), '\1ijen'); + + yield new Transformation(new Pattern('()orie$'), '\1orieen'); // ën klemtoon + + yield new Transformation(new Pattern('()io$'), '\1io\'s'); + + yield new Transformation(new Pattern('()(a|e|i|o|u|ij)$'), '\1\2s'); + + yield new Transformation(new Pattern('()(((c|h|p|q|x|y|z)|(p|t|k|b|d)|(f|s|ch|sj|v|z|g|j)|(m|n|ng)|(l|r)|(j|w)|(ts|zz|tsj|g))e((c|h|p|q|x|y|z)|(p|t|k|b|d)|(f|s|ch|sj|v|z|g|j)|(m|n|ng)|(l|r)|(j|w)|(ts|zz|tsj|g)))$'), '\1\2s'); + + yield new Transformation(new Pattern('()(((c|h|p|q|x|y|z)|(p|t|k|b|d)|(f|s|ch|sj|v|z|g|j)|(m|n|ng)|(l|r)|(j|w)|(ts|zz|tsj|g))(u|i|e|a|o)s)$'), '\1\2sen'); + + yield new Transformation(new Pattern('()(((c|h|p|q|x|y|z)|(p|t|k|b|d)|(f|s|ch|sj|v|z|g|j)|(m|n|ng)|(l|r)|(j|w)|(ts|zz|tsj|g))s)$'), '\1\2en'); + + yield new Transformation(new Pattern('()s$'), '\1zen'); + + yield new Transformation(new Pattern('()((c|h|p|q|x|y|z)|(p|t|k|b|d)|(f|s|ch|sj|v|z|g|j)|(m|n|ng)|(l|r)|(j|w)|(ts|zz|tsj|g))$'), '\1\2en'); + } + + /** + * @return iterable + */ + public static function getIrregular(): iterable + { + // http://nl.wikipedia.org/wiki/Klemtoon + yield new Substitution(new Word('olie'), new Word('oliën')); + + yield new Substitution(new Word('industrie'), new Word('industrieën')); + + yield new Substitution(new Word('idee'), new Word('ideeën')); + + // @todo: above 3 examples maybe could be compacted into a rule + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Klinkerverandering + yield new Substitution(new Word('lid'), new Word('leden')); + + yield new Substitution(new Word('smid'), new Word('smeden')); + + // @todo: above 2 examples might be compacted into a rule + // @todo: also f.i. ooglid oogleden + + yield new Substitution(new Word('schip'), new Word('schepen')); + + yield new Substitution(new Word('stad'), new Word('steden')); + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Stapelmeervoud + yield new Substitution(new Word('gelid'), new Word('gelederen')); + + yield new Substitution(new Word('kalf'), new Word('kalveren')); + + yield new Substitution(new Word('lam'), new Word('lammeren')); + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Onregelmatige_meervoudsvorming + yield new Substitution(new Word('koe'), new Word('koeien')); + + yield new Substitution(new Word('vlo'), new Word('vlooien')); + + yield new Substitution(new Word('leerrede'), new Word('leerredenen')); + + yield new Substitution(new Word('lende'), new Word('lendenen')); + + yield new Substitution(new Word('epos'), new Word('epen')); + + yield new Substitution(new Word('genius'), new Word('geniën')); + + yield new Substitution(new Word('aanbod'), new Word('aanbiedingen')); + + yield new Substitution(new Word('beleg'), new Word('belegeringen')); + + yield new Substitution(new Word('dank'), new Word('dankbetuigingen')); + + yield new Substitution(new Word('gedrag'), new Word('gedragingen')); + + yield new Substitution(new Word('genot'), new Word('genietingen')); + + yield new Substitution(new Word('lof'), new Word('lofbetuigingen')); + + // http://nl.wikipedia.org/wiki/Meervoud_(Nederlands)#Latijnse_meervoudsvormen + yield new Substitution(new Word('quaestrix'), new Word('quaestrices')); + + yield new Substitution(new Word('matrix'), new Word('matrices')); + } +} diff --git a/lib/Doctrine/Inflector/Rules/Dutch/InflectorFactory.php b/lib/Doctrine/Inflector/Rules/Dutch/InflectorFactory.php new file mode 100644 index 00000000..742233bc --- /dev/null +++ b/lib/Doctrine/Inflector/Rules/Dutch/InflectorFactory.php @@ -0,0 +1,21 @@ +getFlippedSubstitutions() + ); + } + + public static function getPluralRuleset(): Ruleset + { + return new Ruleset( + new Transformations(...Inflectible::getPlural()), + new Patterns(...Uninflected::getPlural()), + new Substitutions(...Inflectible::getIrregular()) + ); + } +} diff --git a/lib/Doctrine/Inflector/Rules/Dutch/Uninflected.php b/lib/Doctrine/Inflector/Rules/Dutch/Uninflected.php new file mode 100644 index 00000000..7c984057 --- /dev/null +++ b/lib/Doctrine/Inflector/Rules/Dutch/Uninflected.php @@ -0,0 +1,66 @@ + + */ + public static function getSingular(): iterable + { + yield from self::getDefault(); + + // http://nl.wikipedia.org/wiki/Plurale_tantum + yield new Pattern('hersenen'); + yield new Pattern('ingewanden'); + yield new Pattern('mazelen'); + yield new Pattern('pokken'); + yield new Pattern('waterpokken'); + yield new Pattern('financiën'); + yield new Pattern('activa'); + yield new Pattern('passiva'); + yield new Pattern('onkosten'); + yield new Pattern('kosten'); + yield new Pattern('bescheiden'); + yield new Pattern('paperassen'); + yield new Pattern('notulen'); + yield new Pattern('Roma'); + yield new Pattern('Sinti'); + yield new Pattern('Inuit'); + yield new Pattern('taliban'); + yield new Pattern('illuminati'); + yield new Pattern('aanstalten'); + yield new Pattern('hurken'); + yield new Pattern('lurven'); + yield new Pattern('luren'); + } + + /** + * @return iterable + */ + public static function getPlural(): iterable + { + yield from self::getDefault(); + + // http://nl.wikipedia.org/wiki/Singulare_tantum + yield new Pattern('letterkunde'); + yield new Pattern('muziek'); + yield new Pattern('heelal'); + yield new Pattern('vastgoed'); + yield new Pattern('have'); + yield new Pattern('nageslacht'); + } + + /** + * @return iterable + */ + private static function getDefault(): iterable + { + yield new Pattern('twitter'); + } +} diff --git a/tests/Doctrine/Tests/Inflector/InflectorFactoryTest.php b/tests/Doctrine/Tests/Inflector/InflectorFactoryTest.php index abb91792..bcebf44a 100644 --- a/tests/Doctrine/Tests/Inflector/InflectorFactoryTest.php +++ b/tests/Doctrine/Tests/Inflector/InflectorFactoryTest.php @@ -7,6 +7,7 @@ use Doctrine\Inflector\InflectorFactory; use Doctrine\Inflector\Language; use Doctrine\Inflector\LanguageInflectorFactory; +use Doctrine\Inflector\Rules\Dutch\InflectorFactory as DutchInflectorFactory; use Doctrine\Inflector\Rules\English\InflectorFactory as EnglishInflectorFactory; use Doctrine\Inflector\Rules\French\InflectorFactory as FrenchInflectorFactory; use Doctrine\Inflector\Rules\NorwegianBokmal\InflectorFactory as NorwegianBokmalInflectorFactory; @@ -38,6 +39,7 @@ public function testCreateForLanguageWithCustomLanguage(string $expectedClass, s */ public static function provideLanguages(): Generator { + yield 'Dutch' => [DutchInflectorFactory::class, Language::DUTCH]; yield 'English' => [EnglishInflectorFactory::class, Language::ENGLISH]; yield 'French' => [FrenchInflectorFactory::class, Language::FRENCH]; yield 'Norwegian Bokmal' => [NorwegianBokmalInflectorFactory::class, Language::NORWEGIAN_BOKMAL]; diff --git a/tests/Doctrine/Tests/Inflector/Rules/Dutch/DutchFunctionalTest.php b/tests/Doctrine/Tests/Inflector/Rules/Dutch/DutchFunctionalTest.php new file mode 100644 index 00000000..923f9307 --- /dev/null +++ b/tests/Doctrine/Tests/Inflector/Rules/Dutch/DutchFunctionalTest.php @@ -0,0 +1,70 @@ + + */ + public function dataSampleWords(): array + { + return [ + ['schip', 'schepen'], + ['idee', 'ideeën'], + ['stad', 'steden'], + ['gelid', 'gelederen'], + // @todo more words + ['weerman', 'weermannen'], + ['ei', 'eieren'], + ['rij', 'rijen'], + ['mogelijkheid', 'mogelijkheden'], + ['adres', 'adressen'], + ['olie', 'oliën'], + ['industrie', 'industrieën'], + ['lid', 'leden'], + ['smid', 'smeden'], + ['kalf', 'kalveren'], + ['lam', 'lammeren'], + ['koe', 'koeien'], + ['vlo', 'vlooien'], + ['leerrede', 'leerredenen'], + ['lende', 'lendenen'], + ['genius', 'geniën'], + ['aanbod', 'aanbiedingen'], + ['dank', 'dankbetuigingen'], + ['gedrag', 'gedragingen'], + ['genot', 'genietingen'], + ['lof', 'lofbetuigingen'], + ['quaestrix', 'quaestrices'], + ['matrix', 'matrices'], + // @todo: newly added, create exceptions or rules + // @todo: check next 5 words + // ['meer', 'meren'], + // ['baas', 'bazen'], + // ['oog', 'ogen'], + // ['as', 'assen'], + // ['kies', 'kiezen'], + ['twitter', 'twitter'], + // @todo: multiplitudes array w/o sort order for multiple plural same possibilities + ['epos', 'epen'], + // ['epos', 'epossen'], + // @todo: multiplitudes array w/o sort order for multiple plural different meanings + ['beleg', 'belegeringen'], // @todo: meaning: invest a city and then conquer it + // ['beleg', 'beleggen'], // @todo: meaning: call a meeting + // ['beleg', 'belegjes'], // @todo: meaning: the slices food on a sandwich + ]; + } + + protected function createInflector(): Inflector + { + return InflectorFactory::createForLanguage(Language::DUTCH)->build(); + } +}