from typing import Callable, Iterator
import spacy
from spacy.language import Language
from spacy.training import Example
from ...character import create_char_replace_augmenter_v1
from ...token import create_conditional_token_casing_augmenter_v1
[docs]@spacy.registry.augmenters("da_æøå_replace_v1") # type: ignore
def create_da_æøå_replace_augmenter_v1(
level: float,
) -> Callable[[Language, Example], Iterator[Example]]: # type: ignore
"""Creates an augmenter that augments æ, ø, and å into their spelling
variants ae, oe, aa.
Args:
level: probability to augment æ, ø or å.
Returns:
The desired augmenter.
Example:
>>> import augmenty
>>> from spacy.lang.en import English
>>> nlp = English()
>>> augmenter = augmenty.load("da_æøå_replace_v1", level=0.1)
>>> texts = ["æ ø Å"]
>>> list(augmenty.texts(texts, augmenter, nlp))
["ae oe Aa"]
"""
replace_dict = {
"æ": ["ae"],
"ø": ["oe"],
"å": ["aa"],
"Æ": ["Ae"],
"Ø": ["Oe"],
"Å": ["Aa"],
}
return create_char_replace_augmenter_v1(replace=replace_dict, level=level)
[docs]@spacy.registry.augmenters("da_historical_noun_casing_v1") # type: ignore
def create_da_historical_noun_casing_augmenter_v1(
level: float,
) -> Callable[[Language, Example], Iterator[Example]]: # type: ignore
"""Creates an augmenter that capitalizes nouns.
Args:
level: The probabiliy to upper case a noun.
Returns:
The augmenter.
"""
def conditional(token):
if token.pos_ == "NOUN":
return True
return False
return create_conditional_token_casing_augmenter_v1(
conditional=conditional,
upper=True,
level=level,
)