Source code for augmenty.doc.casing

import random
from functools import partial
from typing import Callable, Iterator

import spacy
from spacy.language import Language
from spacy.training import Example


def upper_casing_augmenter_v1(
    nlp: Language,
    example: Example,
    *,
    level: float,
) -> Iterator[Example]:  # type: ignore
    if random.random() >= level:
        yield example
    else:
        example_dict = example.to_dict()
        doc = nlp.make_doc(example.text.upper())
        example_dict["token_annotation"]["ORTH"] = [
            t.text.upper() for t in example.reference
        ]
        yield example.from_dict(doc, example_dict)


[docs]@spacy.registry.augmenters("upper_case_v1") # type: ignore def create_upper_casing_augmenter_v1( level: float, ) -> Callable[[Language, Example], Iterator[Example]]: # type: ignore """Create an augmenter that converts documents to uppercase. Args: level: The percentage of examples that will be augmented. Returns: The augmenter. Example: >>> import augmenty >>> import spacy >>> nlp = spacy.blank("en") >>> upper_case_augmenter = augmenty.load("upper_case_v1", level=0.1) >>> texts = ["A sample text"] >>> list(augmenty.texts(texts, upper_case_augmenter, nlp)) ["A SAMPLE TEXT"] """ return partial(upper_casing_augmenter_v1, level=level)
def spongebob_augmenter_v1( nlp: Language, example: Example, *, level: float, ) -> Iterator[Example]: # type: ignore if random.random() >= level: yield example else: chars = [c.lower() if i % 2 else c.upper() for i, c in enumerate(example.text)] example_dict = example.to_dict() doc = nlp.make_doc("".join(chars)) example_dict["token_annotation"]["ORTH"] = [ doc.text[t.idx : t.idx + len(t.text)] for t in example.y ] yield example.from_dict(doc, example_dict)
[docs]@spacy.registry.augmenters("spongebob_v1") # type: ignore def create_spongebob_augmenter_v1( level: float, ) -> Callable[[Language, Example], Iterator[Example]]: # type: ignore """Create an augmneter that converts documents to SpOnGeBoB casing. Args: level: The percentage of examples that will be augmented. Returns: The augmenter. Example: >>> import augmenty >>> import spacy >>> nlp = spacy.blank("en") >>> spongebob_augmenter = augmenty.load("spongebob_v1", level=1) >>> texts = ["A sample text"] >>> list(augmenty.texts(texts, spongebob_augmenter, nlp)) ["A SaMpLe tExT"] """ return partial(spongebob_augmenter_v1, level=level)