Skip to content

scandeval.model_loading

docs module scandeval.model_loading

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""Functions related to the loading of models."""

import typing as t

from .benchmark_modules import (
    FreshEncoderModel,
    HuggingFaceEncoderModel,
    LiteLLMModel,
    VLLMModel,
)
from .constants import GENERATIVE_DATASET_TASK_GROUPS
from .enums import InferenceBackend, ModelType
from .exceptions import InvalidBenchmark, InvalidModel

if t.TYPE_CHECKING:
    from .benchmark_modules import BenchmarkModule
    from .data_models import BenchmarkConfig, DatasetConfig, ModelConfig


def load_model(
    model_config: "ModelConfig",
    dataset_config: "DatasetConfig",
    benchmark_config: "BenchmarkConfig",
) -> "BenchmarkModule":
    """Load a model.

    Args:
        model_config:
            The model configuration.
        dataset_config:
            The dataset configuration.
        benchmark_config:
            The benchmark configuration.

    Returns:
        The model.
    """
    # The order matters; the first model type that matches will be used. For this
    # reason, they have been ordered in terms of the most common model types.
    model_class: t.Type[BenchmarkModule]
    match (model_config.model_type, model_config.inference_backend, model_config.fresh):
        case (ModelType.GENERATIVE, InferenceBackend.VLLM, False):
            model_class = VLLMModel
        case (ModelType.ENCODER, InferenceBackend.TRANSFORMERS, False):
            model_class = HuggingFaceEncoderModel
        case (ModelType.GENERATIVE, InferenceBackend.LITELLM, False):
            model_class = LiteLLMModel
        case (ModelType.ENCODER, InferenceBackend.TRANSFORMERS, True):
            model_class = FreshEncoderModel
        case (_, _, True):
            raise InvalidModel(
                "Cannot load a freshly initialised model with the model type "
                f"{model_config.model_type!r} and inference backend "
                f"{model_config.inference_backend!r}."
            )
        case _:
            raise InvalidModel(
                f"Cannot load model with model type {model_config.model_type!r} and "
                f"inference backend {model_config.inference_backend!r}."
            )

    # Refuse to benchmark non-generative models on generative tasks
    if (
        dataset_config.task.task_group in GENERATIVE_DATASET_TASK_GROUPS
        and not model_config.model_type == ModelType.GENERATIVE
    ):
        raise InvalidBenchmark(
            f"Cannot benchmark non-generative model {model_config.model_id!r} on "
            f"generative task {dataset_config.task.name!r}."
        )

    model = model_class(
        model_config=model_config,
        dataset_config=dataset_config,
        benchmark_config=benchmark_config,
    )

    return model