Skip to content

API

General

General function for dealing with tasks and models implemented in SEB.

seb.registries.get_task(name)

Fetches a task by name.

Parameters:

Name Type Description Default
name str

The name of the task.

required

Returns:

Type Description
Task

A task.

Source code in seb/registries.py
def get_task(name: str) -> Task:
    """
    Fetches a task by name.

    Args:
        name: The name of the task.

    Returns:
        A task.
    """
    return tasks.get(name)()

seb.registries.get_all_tasks()

Returns all tasks implemented in SEB.

Returns:

Type Description
list[seb.interfaces.task.Task]

A list of all tasks in SEB.

Source code in seb/registries.py
def get_all_tasks() -> list[Task]:
    """
    Returns all tasks implemented in SEB.

    Returns:
        A list of all tasks in SEB.
    """
    return [get_task(task_name) for task_name in tasks.get_all()]

seb.registries.get_model(name)

Fetches a model by name.

Parameters:

Name Type Description Default
name str

The name of the model.

required

Returns:

Type Description
SebModel

A model including metadata.

Source code in seb/registries.py
def get_model(name: str) -> SebModel:
    """
    Fetches a model by name.

    Args:
        name: The name of the model.

    Returns:
        A model including metadata.
    """
    return models.get(name)()

seb.registries.get_all_models()

Get all the models implemented in SEB.

Returns:

Type Description
list[seb.interfaces.model.SebModel]

A list of all models in SEB.

Source code in seb/registries.py
def get_all_models() -> list[SebModel]:
    """
    Get all the models implemented in SEB.

    Returns:
        A list of all models in SEB.
    """
    return [get_model(model_name) for model_name in models.get_all()]

Benchmark

seb.benchmark.Benchmark

Benchmark is the main orchestrator of the SEB benchmark.

Source code in seb/benchmark.py
class Benchmark:
    """
    Benchmark is the main orchestrator of the SEB benchmark.
    """

    def __init__(
        self,
        languages: Optional[list[str]] = None,
        tasks: Optional[Union[Iterable[str], Iterable[Task]]] = None,
    ) -> None:
        """
        Initialize the benchmark.

        Args:
            languages: A list of languages to run the benchmark on. If None, all languages are used.
            tasks: The tasks to run the benchmark on. If None, all tasks are used. Can either be specified as strings or as Task objects.
        """
        self.languages = languages

        self.tasks = self.get_tasks(tasks, languages)
        self.task_names = [task.name for task in self.tasks]

    @staticmethod
    def get_tasks(
        tasks: Optional[Union[Iterable[str], Iterable[Task]]],
        languages: Optional[list[str]],
    ) -> list[Task]:
        """
        Get the tasks for the benchmark.

        Returns:
            A list of tasks.
        """
        _tasks = []

        if tasks is None:
            _tasks = get_all_tasks()
        else:
            for task in tasks:
                if isinstance(task, str):
                    _tasks.append(get_task(task))
                elif isinstance(task, Task):
                    _tasks.append(task)
                else:
                    raise ValueError(f"Invalid task type: {type(task)}")

        if languages is not None:
            langs = set(languages)
            _tasks = [task for task in _tasks if set(task.languages) & langs]

        return _tasks

    def evaluate_model(
        self,
        model: SebModel,
        use_cache: bool = True,
        run_model: bool = True,
        raise_errors: bool = True,
        cache_dir: Optional[Path] = None,
        verbose: bool = True,
    ) -> BenchmarkResults:
        """
        Evaluate a model on the benchmark.

        Args:
            model: The model to evaluate.
            use_cache: Whether to use the cache.
            run_model: Whether to run the model if the cache is not present.
            raise_errors: Whether to raise errors.
            cache_dir: The cache directory to use. If None, the default cache directory is used.
            verbose: Whether to show a progress bar.

        Returns:
            The results of the benchmark.
        """
        task_results = []
        pbar = tqdm(
            self.tasks,
            position=1,
            desc=f"Running {model.meta.name}",
            leave=False,
            disable=not verbose,
        )
        for task in pbar:
            pbar.set_description(f"Running {model.meta.name} on {task.name}")
            task_result = run_task(
                task,
                model,
                use_cache=use_cache,
                run_model=run_model,
                raise_errors=raise_errors,
                cache_dir=cache_dir,
            )
            task_results.append(task_result)

        return BenchmarkResults(meta=model.meta, task_results=task_results)

    def evaluate_models(
        self,
        models: list[SebModel],
        use_cache: bool = True,
        run_model: bool = True,
        raise_errors: bool = True,
        cache_dir: Optional[Path] = None,
        verbose: bool = True,
    ) -> list[BenchmarkResults]:
        """
        Evaluate a list of models on the benchmark.

        Args:
            models: The models to evaluate.
            use_cache: Whether to use the cache.
            run_model: Whether to run the model if the cache is not present.
            raise_errors: Whether to raise errors.
            cache_dir: The cache directory to use. If None, the default cache directory is used.
            verbose: Whether to show a progress bar.

        Returns:
            The results of the benchmark, once for each model.
        """
        results = []
        pbar = tqdm(
            models,
            position=0,
            desc="Running Benchmark",
            leave=True,
            disable=not verbose,
        )

        for model in pbar:
            pbar.set_description(f"Running {model.meta.name}")
            results.append(
                self.evaluate_model(
                    model,
                    use_cache=use_cache,
                    run_model=run_model,
                    raise_errors=raise_errors,
                    cache_dir=cache_dir,
                    verbose=verbose,
                ),
            )
        return results

__init__(self, languages=None, tasks=None) special

Initialize the benchmark.

Parameters:

Name Type Description Default
languages Optional[list[str]]

A list of languages to run the benchmark on. If None, all languages are used.

None
tasks Union[collections.abc.Iterable[str], collections.abc.Iterable[seb.interfaces.task.Task]]

The tasks to run the benchmark on. If None, all tasks are used. Can either be specified as strings or as Task objects.

None
Source code in seb/benchmark.py
def __init__(
    self,
    languages: Optional[list[str]] = None,
    tasks: Optional[Union[Iterable[str], Iterable[Task]]] = None,
) -> None:
    """
    Initialize the benchmark.

    Args:
        languages: A list of languages to run the benchmark on. If None, all languages are used.
        tasks: The tasks to run the benchmark on. If None, all tasks are used. Can either be specified as strings or as Task objects.
    """
    self.languages = languages

    self.tasks = self.get_tasks(tasks, languages)
    self.task_names = [task.name for task in self.tasks]

evaluate_model(self, model, use_cache=True, run_model=True, raise_errors=True, cache_dir=None, verbose=True)

Evaluate a model on the benchmark.

Parameters:

Name Type Description Default
model SebModel

The model to evaluate.

required
use_cache bool

Whether to use the cache.

True
run_model bool

Whether to run the model if the cache is not present.

True
raise_errors bool

Whether to raise errors.

True
cache_dir Optional[pathlib.Path]

The cache directory to use. If None, the default cache directory is used.

None
verbose bool

Whether to show a progress bar.

True

Returns:

Type Description
BenchmarkResults

The results of the benchmark.

Source code in seb/benchmark.py
def evaluate_model(
    self,
    model: SebModel,
    use_cache: bool = True,
    run_model: bool = True,
    raise_errors: bool = True,
    cache_dir: Optional[Path] = None,
    verbose: bool = True,
) -> BenchmarkResults:
    """
    Evaluate a model on the benchmark.

    Args:
        model: The model to evaluate.
        use_cache: Whether to use the cache.
        run_model: Whether to run the model if the cache is not present.
        raise_errors: Whether to raise errors.
        cache_dir: The cache directory to use. If None, the default cache directory is used.
        verbose: Whether to show a progress bar.

    Returns:
        The results of the benchmark.
    """
    task_results = []
    pbar = tqdm(
        self.tasks,
        position=1,
        desc=f"Running {model.meta.name}",
        leave=False,
        disable=not verbose,
    )
    for task in pbar:
        pbar.set_description(f"Running {model.meta.name} on {task.name}")
        task_result = run_task(
            task,
            model,
            use_cache=use_cache,
            run_model=run_model,
            raise_errors=raise_errors,
            cache_dir=cache_dir,
        )
        task_results.append(task_result)

    return BenchmarkResults(meta=model.meta, task_results=task_results)

evaluate_models(self, models, use_cache=True, run_model=True, raise_errors=True, cache_dir=None, verbose=True)

Evaluate a list of models on the benchmark.

Parameters:

Name Type Description Default
models list[seb.interfaces.model.SebModel]

The models to evaluate.

required
use_cache bool

Whether to use the cache.

True
run_model bool

Whether to run the model if the cache is not present.

True
raise_errors bool

Whether to raise errors.

True
cache_dir Optional[pathlib.Path]

The cache directory to use. If None, the default cache directory is used.

None
verbose bool

Whether to show a progress bar.

True

Returns:

Type Description
list[seb.result_dataclasses.BenchmarkResults]

The results of the benchmark, once for each model.

Source code in seb/benchmark.py
def evaluate_models(
    self,
    models: list[SebModel],
    use_cache: bool = True,
    run_model: bool = True,
    raise_errors: bool = True,
    cache_dir: Optional[Path] = None,
    verbose: bool = True,
) -> list[BenchmarkResults]:
    """
    Evaluate a list of models on the benchmark.

    Args:
        models: The models to evaluate.
        use_cache: Whether to use the cache.
        run_model: Whether to run the model if the cache is not present.
        raise_errors: Whether to raise errors.
        cache_dir: The cache directory to use. If None, the default cache directory is used.
        verbose: Whether to show a progress bar.

    Returns:
        The results of the benchmark, once for each model.
    """
    results = []
    pbar = tqdm(
        models,
        position=0,
        desc="Running Benchmark",
        leave=True,
        disable=not verbose,
    )

    for model in pbar:
        pbar.set_description(f"Running {model.meta.name}")
        results.append(
            self.evaluate_model(
                model,
                use_cache=use_cache,
                run_model=run_model,
                raise_errors=raise_errors,
                cache_dir=cache_dir,
                verbose=verbose,
            ),
        )
    return results

get_tasks(tasks, languages) staticmethod

Get the tasks for the benchmark.

Returns:

Type Description
list[seb.interfaces.task.Task]

A list of tasks.

Source code in seb/benchmark.py
@staticmethod
def get_tasks(
    tasks: Optional[Union[Iterable[str], Iterable[Task]]],
    languages: Optional[list[str]],
) -> list[Task]:
    """
    Get the tasks for the benchmark.

    Returns:
        A list of tasks.
    """
    _tasks = []

    if tasks is None:
        _tasks = get_all_tasks()
    else:
        for task in tasks:
            if isinstance(task, str):
                _tasks.append(get_task(task))
            elif isinstance(task, Task):
                _tasks.append(task)
            else:
                raise ValueError(f"Invalid task type: {type(task)}")

    if languages is not None:
        langs = set(languages)
        _tasks = [task for task in _tasks if set(task.languages) & langs]

    return _tasks

Interfaces

SEB implements to main interfaces. A task interface which is a tasks within the Benchmark and a model interface which is a model applied to the tasks.

Model Interface

seb.interfaces.model.Encoder (Protocol)

Interface which all models must implement.

Source code in seb/interfaces/model.py
@runtime_checkable
class Encoder(Protocol):
    """
    Interface which all models must implement.
    """

    def encode(
        self,
        sentences: list[str],
        *,
        task: Optional["Task"] = None,
        batch_size: int = 32,
        **kwargs: Any,
    ) -> np.ndarray:
        """Returns a list of embeddings for the given sentences.

        Args:
            sentences: List of sentences to encode
            task: The task to encode for. This allows the model to encode differently for different tasks. Will always be given but does not need
                to be used.
            batch_size: Batch size for the encoding
            kwargs: arguments to pass to the models encode method

        Returns:
            Embeddings for the given documents
        """
        ...

    # The following methods are optional and can be implemented if the model supports them.
    # def to(self, device: torch.device):
    #     ...

    # def encode_queries(self, queries: list[str], **kwargs: Any) -> np.ndarray:
    #     ...

    # def encode_corpus(self, corpus: list[dict[str, str]], **kwargs: Any) -> np.ndarray:
    #     ...

encode(self, sentences, *, task=None, batch_size=32, **kwargs)

Returns a list of embeddings for the given sentences.

Parameters:

Name Type Description Default
sentences list[str]

List of sentences to encode

required
task Optional[Task]

The task to encode for. This allows the model to encode differently for different tasks. Will always be given but does not need to be used.

None
batch_size int

Batch size for the encoding

32
kwargs Any

arguments to pass to the models encode method

{}

Returns:

Type Description
ndarray

Embeddings for the given documents

Source code in seb/interfaces/model.py
def encode(
    self,
    sentences: list[str],
    *,
    task: Optional["Task"] = None,
    batch_size: int = 32,
    **kwargs: Any,
) -> np.ndarray:
    """Returns a list of embeddings for the given sentences.

    Args:
        sentences: List of sentences to encode
        task: The task to encode for. This allows the model to encode differently for different tasks. Will always be given but does not need
            to be used.
        batch_size: Batch size for the encoding
        kwargs: arguments to pass to the models encode method

    Returns:
        Embeddings for the given documents
    """
    ...

seb.interfaces.model.LazyLoadEncoder (Encoder) dataclass

Encoder object, which lazy loads the model on the first call to encode()

Source code in seb/interfaces/model.py
@dataclass
class LazyLoadEncoder(Encoder):
    """Encoder object, which lazy loads the model on the first call to encode()"""

    loader: Callable[[], Encoder]
    _model: Optional[Encoder] = None

    def load_model(self):
        """
        Load the model.
        """
        if self._model is None:
            self._model = self.loader()

    def to(self, device: torch.device):
        self.load_model()
        try:
            self._model = self._model.to(device)  # type: ignore
        except AttributeError:
            logging.debug(f"Model {self._model} does not have a to method")

    @property
    def model(self) -> Encoder:
        """
        Dynimically load the model.
        """
        self.load_model()
        return self._model  # type: ignore

    def encode(
        self,
        sentences: list[str],
        *,
        task: Optional["Task"] = None,
        **kwargs: Any,
    ) -> np.ndarray:
        """
        Returns a list of embeddings for the given sentences.
        Args:
            sentences: List of sentences to encode
            task: The task to encode for. This allows the model to encode differently for different tasks. Will always be given but does not need
                to be used.
            batch_size: Batch size for the encoding
            kwargs: arguments to pass to the models encode method

        Returns:
            Embeddings for the given documents
        """
        return self.model.encode(sentences, task=task, **kwargs)

    def encode_queries(self, queries: list[str], **kwargs: Any) -> np.ndarray:
        try:
            return self.model.encode_queries(queries, **kwargs)  # type: ignore
        except AttributeError:
            return self.encode(queries, **kwargs)

    def encode_corpus(self, corpus: list[dict[str, str]], **kwargs: Any) -> np.ndarray:
        try:
            return self.model.encode_corpus(corpus, **kwargs)  # type: ignore
        except AttributeError:
            sep = " "
            if isinstance(corpus, dict):
                sentences = [
                    (corpus["title"][i] + sep + corpus["text"][i]).strip() if "title" in corpus else corpus["text"][i].strip()  # type: ignore
                    for i in range(len(corpus["text"]))  # type: ignore
                ]
            else:
                sentences = [(doc["title"] + sep + doc["text"]).strip() if "title" in doc else doc["text"].strip() for doc in corpus]
            return self.encode(sentences, **kwargs)

model: Encoder property readonly

Dynimically load the model.

__init__(self, loader, _model=None) special

Initialize self. See help(type(self)) for accurate signature.

encode(self, sentences, *, task=None, **kwargs)

Returns a list of embeddings for the given sentences.

Parameters:

Name Type Description Default
sentences list[str]

List of sentences to encode

required
task Optional[Task]

The task to encode for. This allows the model to encode differently for different tasks. Will always be given but does not need to be used.

None
batch_size

Batch size for the encoding

required
kwargs Any

arguments to pass to the models encode method

{}

Returns:

Type Description
ndarray

Embeddings for the given documents

Source code in seb/interfaces/model.py
def encode(
    self,
    sentences: list[str],
    *,
    task: Optional["Task"] = None,
    **kwargs: Any,
) -> np.ndarray:
    """
    Returns a list of embeddings for the given sentences.
    Args:
        sentences: List of sentences to encode
        task: The task to encode for. This allows the model to encode differently for different tasks. Will always be given but does not need
            to be used.
        batch_size: Batch size for the encoding
        kwargs: arguments to pass to the models encode method

    Returns:
        Embeddings for the given documents
    """
    return self.model.encode(sentences, task=task, **kwargs)

load_model(self)

Load the model.

Source code in seb/interfaces/model.py
def load_model(self):
    """
    Load the model.
    """
    if self._model is None:
        self._model = self.loader()

seb.interfaces.model.SebModel dataclass

An embedding model as implemented in SEB. It notably dynamically loads models (such that models are not loaded when a cache is hit) and includes metadata pertaining to the specific model.

Source code in seb/interfaces/model.py
@dataclass
class SebModel:
    """
    An embedding model as implemented in SEB. It notably dynamically loads models (such that models are not loaded when a cache is hit)
    and includes metadata pertaining to the specific model.
    """

    meta: ModelMeta
    encoder: Encoder

    @property
    def number_of_parameters(self) -> Optional[int]:
        """
        Returns the number of parameters in the model.
        """
        if hasattr(self.encoder, "num_parameters"):
            return sum(p.numel() for p in self.model.parameters() if p.requires_grad)  # type: ignore
        return None

number_of_parameters: Optional[int] property readonly

Returns the number of parameters in the model.

Task Interface

seb.interfaces.task.Task (Protocol)

A task is a specific evaluation task for a sentence embedding model.

Attributes:

Name Type Description
name str

The name of the task.

main_score str

The main score of the task.

reference str

A reference to the task.

version str

The version of the task.

languages list[Literal['da', 'nb', 'nn', 'sv', 'da-bornholm', 'is', 'fo', 'en']]

The languages of the task.

domain list[Literal['social', 'poetry', 'wiki', 'fiction', 'non-fiction', 'web', 'legal', 'news', 'academic', 'spoken', 'reviews', 'blog', 'medical', 'government', 'bible']]

The domains of the task. Should be one of the categories listed on https://universaldependencies.org

task_type Literal['Classification', 'Retrieval', 'STS', 'BitextMining', 'Clustering', 'Speed']

A list of task types, determines how the task is being evaluated. E.g. Classification.

task_subtypes list[str]

a list of subtypes e.g. Sentiment Classification.

description str

A description of the task.

Source code in seb/interfaces/task.py
@runtime_checkable
class Task(Protocol):
    """
    A task is a specific evaluation task for a sentence embedding model.

    Attributes:
        name: The name of the task.
        main_score: The main score of the task.
        reference: A reference to the task.
        version: The version of the task.
        languages: The languages of the task.
        domain: The domains of the task. Should be one of the categories listed on https://universaldependencies.org
        task_type: A list of task types, determines how the task is being evaluated. E.g. Classification.
        task_subtypes: a list of subtypes e.g. Sentiment Classification.
        description: A description of the task.
    """

    name: str
    main_score: str
    reference: str
    version: str
    languages: list[Language]
    domain: list[Domain]
    task_type: TaskType
    task_subtypes: list[str]
    description: str

    def evaluate(self, model: Encoder) -> TaskResult:
        """
        Evaluates a Sentence Embedding Model on the task.

        Args:
            model: A model with the encode method implemented.

        Returns:
            A TaskResult object.
        """
        ...

    def get_documents(self) -> list[str]:
        """
        Get the documents for the task.

        Returns:
            A list of strings.
        """
        ...

    def get_descriptive_stats(self) -> DescriptiveDatasetStats:
        texts = self.get_documents()
        document_lengths = np.array([len(text) for text in texts])

        mean = float(np.mean(document_lengths))
        std = float(np.std(document_lengths))
        return DescriptiveDatasetStats(
            mean_document_length=mean,
            std_document_length=std,
            num_documents=len(document_lengths),
        )

    def name_to_path(self) -> str:
        """
        Convert a name to a path.
        """
        name = self.name.replace("/", "__").replace(" ", "_")
        return name

evaluate(self, model)

Evaluates a Sentence Embedding Model on the task.

Parameters:

Name Type Description Default
model Encoder

A model with the encode method implemented.

required

Returns:

Type Description
TaskResult

A TaskResult object.

Source code in seb/interfaces/task.py
def evaluate(self, model: Encoder) -> TaskResult:
    """
    Evaluates a Sentence Embedding Model on the task.

    Args:
        model: A model with the encode method implemented.

    Returns:
        A TaskResult object.
    """
    ...

get_documents(self)

Get the documents for the task.

Returns:

Type Description
list[str]

A list of strings.

Source code in seb/interfaces/task.py
def get_documents(self) -> list[str]:
    """
    Get the documents for the task.

    Returns:
        A list of strings.
    """
    ...

name_to_path(self)

Convert a name to a path.

Source code in seb/interfaces/task.py
def name_to_path(self) -> str:
    """
    Convert a name to a path.
    """
    name = self.name.replace("/", "__").replace(" ", "_")
    return name

Data Classes

SEB uses data classes to store the results of a benchmark. The following classes are available:

seb.result_dataclasses.BenchmarkResults (BaseModel)

Dataclass for storing benchmark results.

Attributes:

Name Type Description
meta ModelMeta

ModelMeta object.

task_results list[Union[seb.result_dataclasses.TaskResult, seb.result_dataclasses.TaskError]]

List of TaskResult objects.

Source code in seb/result_dataclasses.py
class BenchmarkResults(BaseModel):
    """
    Dataclass for storing benchmark results.

    Attributes:
        meta: ModelMeta object.
        task_results: List of TaskResult objects.
    """

    meta: ModelMeta
    task_results: list[Union[TaskResult, TaskError]]

    def get_main_score(self, lang: Optional[Iterable[Language]] = None) -> float:
        scores = [t.get_main_score(lang) for t in self.task_results]
        if scores:
            return sum(scores) / len(scores)
        return np.nan

    def __iter__(self) -> Iterator[Union[TaskResult, TaskError]]:  # type: ignore
        return iter(self.task_results)

    def __getitem__(self, index: int) -> Union[TaskResult, TaskError]:
        return self.task_results[index]

    def __len__(self) -> int:
        return len(self.task_results)

    def to_disk(self, path: Path) -> None:
        """
        Write task results to a path.
        """
        if path.is_file():
            raise ValueError("Can't save BenchmarkResults to a file. Path must be a directory.")
        path.mkdir(parents=True, exist_ok=True)
        for task_result in self.task_results:
            if isinstance(task_result, TaskResult):
                task_result.to_disk(path / f"{task_result.task_name}.json")
            else:
                task_result.to_disk(path / f"{task_result.task_name}.error.json")

        meta_path = path / "meta.json"
        self.meta.to_disk(meta_path)

    @classmethod
    def from_disk(cls, path: Path) -> "BenchmarkResults":
        """
        Load task results from a path.
        """
        if not path.is_dir():
            raise ValueError("Can't load BenchmarkResults from path: {path}. Path must be a directory.")
        task_results = []
        for file in path.glob("*.json"):
            if file.stem == "meta":
                continue
            if file.stem.endswith(".error"):
                task_results.append(TaskError.from_disk(file))
            else:
                task_results.append(TaskResult.from_disk(file))

        meta_path = path / "meta.json"
        meta = ModelMeta.from_disk(meta_path)
        return cls(meta=meta, task_results=task_results)

__class_vars__ special

The names of the class variables defined on the model.

__private_attributes__ special

Metadata about the private attributes of the model.

__pydantic_complete__ special

Whether model building is completed, or if there are still undefined fields.

__pydantic_computed_fields__ special

A dictionary of computed field names and their corresponding [ComputedFieldInfo][pydantic.fields.ComputedFieldInfo] objects.

__pydantic_custom_init__ special

Whether the model has a custom __init__ method.

__pydantic_decorators__ special

Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.

__pydantic_fields__ special

A dictionary of field names and their corresponding [FieldInfo][pydantic.fields.FieldInfo] objects. This replaces Model.__fields__ from Pydantic V1.

__pydantic_generic_metadata__ special

Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.

__pydantic_parent_namespace__ special

Parent namespace of the model, used for automatic rebuilding of models.

__pydantic_post_init__ special

The name of the post-init method for the model, if defined.

__pydantic_setattr_handlers__ special

__setattr__ handlers. Memoizing the handlers leads to a dramatic performance improvement in __setattr__

__signature__ special

The synthesized __init__ [Signature][inspect.Signature] of the model.

model_config

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

from_disk(path) classmethod

Load task results from a path.

Source code in seb/result_dataclasses.py
@classmethod
def from_disk(cls, path: Path) -> "BenchmarkResults":
    """
    Load task results from a path.
    """
    if not path.is_dir():
        raise ValueError("Can't load BenchmarkResults from path: {path}. Path must be a directory.")
    task_results = []
    for file in path.glob("*.json"):
        if file.stem == "meta":
            continue
        if file.stem.endswith(".error"):
            task_results.append(TaskError.from_disk(file))
        else:
            task_results.append(TaskResult.from_disk(file))

    meta_path = path / "meta.json"
    meta = ModelMeta.from_disk(meta_path)
    return cls(meta=meta, task_results=task_results)

to_disk(self, path)

Write task results to a path.

Source code in seb/result_dataclasses.py
def to_disk(self, path: Path) -> None:
    """
    Write task results to a path.
    """
    if path.is_file():
        raise ValueError("Can't save BenchmarkResults to a file. Path must be a directory.")
    path.mkdir(parents=True, exist_ok=True)
    for task_result in self.task_results:
        if isinstance(task_result, TaskResult):
            task_result.to_disk(path / f"{task_result.task_name}.json")
        else:
            task_result.to_disk(path / f"{task_result.task_name}.error.json")

    meta_path = path / "meta.json"
    self.meta.to_disk(meta_path)

seb.result_dataclasses.TaskResult (BaseModel)

Dataclass for storing task results.

Attributes:

Name Type Description
task_name str

Name of the task.

task_description str

Description of the task.

task_version str

Version of the task.

time_of_run datetime

Time of the run.

scores dict[Literal['da', 'nb', 'nn', 'sv', 'da-bornholm', 'is', 'fo', 'en'], dict[str, Union[float, str]]]

Dictionary of scores on the form {language: {"metric": value}}.

main_score str

Name of the main score.

Source code in seb/result_dataclasses.py
class TaskResult(BaseModel):
    """
    Dataclass for storing task results.

    Attributes:
        task_name: Name of the task.
        task_description: Description of the task.
        task_version: Version of the task.
        time_of_run: Time of the run.
        scores: Dictionary of scores on the form {language: {"metric": value}}.
        main_score: Name of the main score.
    """

    task_name: str
    task_description: str
    task_version: str
    time_of_run: datetime
    scores: dict[Language, dict[str, Union[float, str]]]  # {language: {"metric": value}}.
    main_score: str

    def get_main_score(self, lang: Optional[Iterable[str]] = None) -> float:
        """
        Returns the main score for a given set of languages.

        Args:
            lang: List of languages to get the main score for.

        Returns:
            The main score.
        """
        main_scores = []
        if lang is None:
            lang = self.scores.keys()

        for l in lang:
            main_scores.append(self.scores[l][self.main_score])  # type: ignore

        return sum(main_scores) / len(main_scores)

    @property
    def languages(self) -> list[Language]:
        """
        Returns the languages of the task.
        """
        return list(self.scores.keys())

    @classmethod
    def from_disk(cls, path: Path) -> "TaskResult":
        """
        Load task results from a path.
        """
        with path.open("r") as f:
            task_results = json.load(f)
        return cls(**task_results)

    def to_disk(self, path: Path) -> None:
        """
        Write task results to a path.
        """
        path.parent.mkdir(parents=True, exist_ok=True)
        json_str: str = self.model_dump_json()  # type: ignore

        with path.open("w") as f:
            f.write(json_str)

    def name_to_path(self) -> str:
        """
        Convert a name to a path.
        """
        name = self.task_name.replace("/", "__").replace(" ", "_")
        return name

__class_vars__ special

The names of the class variables defined on the model.

__private_attributes__ special

Metadata about the private attributes of the model.

__pydantic_complete__ special

Whether model building is completed, or if there are still undefined fields.

__pydantic_computed_fields__ special

A dictionary of computed field names and their corresponding [ComputedFieldInfo][pydantic.fields.ComputedFieldInfo] objects.

__pydantic_custom_init__ special

Whether the model has a custom __init__ method.

__pydantic_decorators__ special

Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.

__pydantic_fields__ special

A dictionary of field names and their corresponding [FieldInfo][pydantic.fields.FieldInfo] objects. This replaces Model.__fields__ from Pydantic V1.

__pydantic_generic_metadata__ special

Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.

__pydantic_parent_namespace__ special

Parent namespace of the model, used for automatic rebuilding of models.

__pydantic_post_init__ special

The name of the post-init method for the model, if defined.

__pydantic_setattr_handlers__ special

__setattr__ handlers. Memoizing the handlers leads to a dramatic performance improvement in __setattr__

__signature__ special

The synthesized __init__ [Signature][inspect.Signature] of the model.

languages: list[Literal['da', 'nb', 'nn', 'sv', 'da-bornholm', 'is', 'fo', 'en']] property readonly

Returns the languages of the task.

model_config

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

from_disk(path) classmethod

Load task results from a path.

Source code in seb/result_dataclasses.py
@classmethod
def from_disk(cls, path: Path) -> "TaskResult":
    """
    Load task results from a path.
    """
    with path.open("r") as f:
        task_results = json.load(f)
    return cls(**task_results)

get_main_score(self, lang=None)

Returns the main score for a given set of languages.

Parameters:

Name Type Description Default
lang Optional[collections.abc.Iterable[str]]

List of languages to get the main score for.

None

Returns:

Type Description
float

The main score.

Source code in seb/result_dataclasses.py
def get_main_score(self, lang: Optional[Iterable[str]] = None) -> float:
    """
    Returns the main score for a given set of languages.

    Args:
        lang: List of languages to get the main score for.

    Returns:
        The main score.
    """
    main_scores = []
    if lang is None:
        lang = self.scores.keys()

    for l in lang:
        main_scores.append(self.scores[l][self.main_score])  # type: ignore

    return sum(main_scores) / len(main_scores)

name_to_path(self)

Convert a name to a path.

Source code in seb/result_dataclasses.py
def name_to_path(self) -> str:
    """
    Convert a name to a path.
    """
    name = self.task_name.replace("/", "__").replace(" ", "_")
    return name

to_disk(self, path)

Write task results to a path.

Source code in seb/result_dataclasses.py
def to_disk(self, path: Path) -> None:
    """
    Write task results to a path.
    """
    path.parent.mkdir(parents=True, exist_ok=True)
    json_str: str = self.model_dump_json()  # type: ignore

    with path.open("w") as f:
        f.write(json_str)

seb.result_dataclasses.TaskError (BaseModel)

Source code in seb/result_dataclasses.py
class TaskError(BaseModel):
    task_name: str
    error: str
    time_of_run: datetime
    languages: list[str] = []

    def to_disk(self, path: Path) -> None:
        """
        Write task results to a path.
        """
        path.parent.mkdir(parents=True, exist_ok=True)
        json_str: str = self.model_dump_json()  # type: ignore

        with path.open("w") as f:
            f.write(json_str)

    @classmethod
    def from_disk(cls, path: Path) -> "TaskError":
        """
        Load task results from a path.
        """
        with path.open() as f:
            task_results = json.load(f)
        return cls(**task_results)

    @staticmethod
    def get_main_score(lang: Optional[Iterable[str]] = None) -> float:  # noqa: ARG004
        return np.nan

    def name_to_path(self) -> str:
        """
        Convert a name to a path.
        """
        name = self.task_name.replace("/", "__").replace(" ", "_")
        return name

__class_vars__ special

The names of the class variables defined on the model.

__private_attributes__ special

Metadata about the private attributes of the model.

__pydantic_complete__ special

Whether model building is completed, or if there are still undefined fields.

__pydantic_computed_fields__ special

A dictionary of computed field names and their corresponding [ComputedFieldInfo][pydantic.fields.ComputedFieldInfo] objects.

__pydantic_custom_init__ special

Whether the model has a custom __init__ method.

__pydantic_decorators__ special

Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.

__pydantic_fields__ special

A dictionary of field names and their corresponding [FieldInfo][pydantic.fields.FieldInfo] objects. This replaces Model.__fields__ from Pydantic V1.

__pydantic_generic_metadata__ special

Metadata for generic models; contains data used for a similar purpose to args, origin, parameters in typing-module generics. May eventually be replaced by these.

__pydantic_parent_namespace__ special

Parent namespace of the model, used for automatic rebuilding of models.

__pydantic_post_init__ special

The name of the post-init method for the model, if defined.

__pydantic_setattr_handlers__ special

__setattr__ handlers. Memoizing the handlers leads to a dramatic performance improvement in __setattr__

__signature__ special

The synthesized __init__ [Signature][inspect.Signature] of the model.

model_config

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

from_disk(path) classmethod

Load task results from a path.

Source code in seb/result_dataclasses.py
@classmethod
def from_disk(cls, path: Path) -> "TaskError":
    """
    Load task results from a path.
    """
    with path.open() as f:
        task_results = json.load(f)
    return cls(**task_results)

name_to_path(self)

Convert a name to a path.

Source code in seb/result_dataclasses.py
def name_to_path(self) -> str:
    """
    Convert a name to a path.
    """
    name = self.task_name.replace("/", "__").replace(" ", "_")
    return name

to_disk(self, path)

Write task results to a path.

Source code in seb/result_dataclasses.py
def to_disk(self, path: Path) -> None:
    """
    Write task results to a path.
    """
    path.parent.mkdir(parents=True, exist_ok=True)
    json_str: str = self.model_dump_json()  # type: ignore

    with path.open("w") as f:
        f.write(json_str)