Source code for augmenty.character.swap

"""Augmenters for swapping characters."""

import random
from functools import partial
from typing import Callable, Iterator

import spacy
from spacy.language import Language
from spacy.training import Example

from augmenty.util import Augmenter

from ..augment_utilities import make_text_from_orth


def char_swap_augmenter_v1(
    nlp: Language, example: Example, level: float
) -> Iterator[Example]:
    def __replace(t):
        for i, c in enumerate(t.text[:-1]):
            if random.random() < level:
                return t.text[:i] + t.text[i + 1] + c + t.text[i + 2 :]
        return t.text

    example_dict = example.to_dict()
    example_dict["token_annotation"]["ORTH"] = [__replace(t) for t in example.reference]
    text = make_text_from_orth(example_dict)
    doc = nlp.make_doc(text)
    yield example.from_dict(doc, example_dict)


[docs]@spacy.registry.augmenters("char_swap_v1") # type: ignore def create_char_swap_augmenter_v1( level: float, ) -> Augmenter: """Creates an augmenter that swaps two neighbouring characters in a token with a given probability. Args: level: probability to replace a character. Returns: The augmenter. Example: >>> import augmenty >>> from spacy.lang.en import English >>> nlp = English() >>> char_swap_augmenter = augmenty.load("char_swap_v1", level=0.1) >>> texts = ["A sample text"] >>> list(augmenty.texts(texts, char_swap_augmenter, nlp)) ["A smaple txet"] """ return partial(char_swap_augmenter_v1, level=level)