From 5b3740091fca39458cd9fe5ae18effd9f3700188 Mon Sep 17 00:00:00 2001 From: Raphael Mitsch Date: Fri, 5 Jan 2024 12:35:52 +0100 Subject: [PATCH 1/3] Add Phi-2. --- spacy_llm/models/hf/__init__.py | 2 + spacy_llm/models/hf/phi2.py | 115 ++++++++++++++++++++++++++++ spacy_llm/tests/models/test_phi2.py | 85 ++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 spacy_llm/models/hf/phi2.py create mode 100644 spacy_llm/tests/models/test_phi2.py diff --git a/spacy_llm/models/hf/__init__.py b/spacy_llm/models/hf/__init__.py index b3afbb71..f7414101 100644 --- a/spacy_llm/models/hf/__init__.py +++ b/spacy_llm/models/hf/__init__.py @@ -4,6 +4,7 @@ from .llama2 import llama2_hf from .mistral import mistral_hf from .openllama import openllama_hf +from .phi2 import phi2_hf from .stablelm import stablelm_hf __all__ = [ @@ -13,5 +14,6 @@ "llama2_hf", "mistral_hf", "openllama_hf", + "phi2_hf", "stablelm_hf", ] diff --git a/spacy_llm/models/hf/phi2.py b/spacy_llm/models/hf/phi2.py new file mode 100644 index 00000000..be195347 --- /dev/null +++ b/spacy_llm/models/hf/phi2.py @@ -0,0 +1,115 @@ +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple + +from confection import SimpleFrozenDict + +from ...compat import Literal, transformers +from ...registry.util import registry +from .base import HuggingFace + + +class Phi2(HuggingFace): + MODEL_NAMES = Literal["phi-2"] # noqa: F722 + + def __init__( + self, + name: str, + config_init: Optional[Dict[str, Any]], + config_run: Optional[Dict[str, Any]], + context_length: Optional[int], + ): + self._tokenizer: Optional["transformers.AutoTokenizer"] = None + super().__init__( + name=name, + config_init=config_init, + config_run=config_run, + context_length=context_length, + ) + + def init_model(self) -> "transformers.AutoModelForCausalLM": + """Sets up HF model and needed utilities. + RETURNS (Any): HF model. + """ + # Initialize tokenizer and model. + self._tokenizer = transformers.AutoTokenizer.from_pretrained( + self._name, trust_remote_code=True + ) + init_cfg = self._config_init + device: Optional[str] = None + if "device" in init_cfg: + device = init_cfg.pop("device") + + model = transformers.AutoModelForCausalLM.from_pretrained( + self._name, **init_cfg + ) + if device: + model.to(device) + + return model + + def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override] + assert callable(self._tokenizer) + responses: List[List[str]] = [] + + for prompts_for_doc in prompts: + tokenized_input_ids = [ + self._tokenizer( + prompt, return_tensors="pt", return_attention_mask=False + ).input_ids + for prompt in prompts_for_doc + ] + tokenized_input_ids = [ + tii.to(self._model.device) for tii in tokenized_input_ids + ] + + assert hasattr(self._model, "generate") + responses.append( + [ + self._tokenizer.decode( + self._model.generate(input_ids=tii, **self._config_run)[ + :, tii.shape[1] : + ][0], + ) + for tii in tokenized_input_ids + ] + ) + + return responses + + @property + def hf_account(self) -> str: + return "microsoft" + + @staticmethod + def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]: + # See https://huggingface.co/microsoft/phi-2#sample-code for recommended setting combinations. + default_cfg_init, default_cfg_run = HuggingFace.compile_default_configs() + return ( + { + **default_cfg_init, + "torch_dtype": "auto", + "device_map": "cuda", + "trust_remote_code": True, + }, + { + **default_cfg_run, + "max_new_tokens": 200, + }, + ) + + +@registry.llm_models("spacy.Phi-2.v1") +def phi2_hf( + name: Phi2.MODEL_NAMES, + config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(), + config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(), +) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]: + """Generates OpenLLaMA instance that can execute a set of prompts and return the raw responses. + name (Literal): Name of the OpenLLaMA model. Has to be one of OpenLLaMA.get_model_names(). + config_init (Optional[Dict[str, Any]]): HF config for initializing the model. + config_run (Optional[Dict[str, Any]]): HF config for running the model. + RETURNS (Callable[[Iterable[str]], Iterable[str]]): OpenLLaMA instance that can execute a set of prompts and return + the raw responses. + """ + return Phi2( + name=name, config_init=config_init, config_run=config_run, context_length=2048 + ) diff --git a/spacy_llm/tests/models/test_phi2.py b/spacy_llm/tests/models/test_phi2.py new file mode 100644 index 00000000..771b0462 --- /dev/null +++ b/spacy_llm/tests/models/test_phi2.py @@ -0,0 +1,85 @@ +import copy + +import pytest +import spacy +from confection import Config # type: ignore[import] +from thinc.compat import has_torch_cuda_gpu + +from ...compat import torch + +_PIPE_CFG = { + "model": { + "@llm_models": "spacy.Phi-2.v1", + "name": "phi-2", + }, + "task": {"@llm_tasks": "spacy.NoOp.v1"}, + "save_io": True, +} + +_NLP_CONFIG = """ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" +save_io = True + +[components.llm.task] +@llm_tasks = "spacy.NoOp.v1" + +[components.llm.model] +@llm_models = spacy.Phi-2.v1 +name = phi-2 +""" + + +@pytest.mark.gpu +@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") +def test_init(): + """Test initialization and simple run.""" + nlp = spacy.blank("en") + nlp.add_pipe("llm", config=_PIPE_CFG) + doc = nlp("This is a test.") + torch.cuda.empty_cache() + assert not doc.user_data["llm_io"]["llm"]["response"][0].startswith( + doc.user_data["llm_io"]["llm"]["prompt"][0] + ) + + +@pytest.mark.gpu +@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") +def test_init_with_set_config(): + """Test initialization and simple run with changed config.""" + nlp = spacy.blank("en") + cfg = copy.deepcopy(_PIPE_CFG) + cfg["model"]["config_run"] = {"max_new_tokens": 32} + nlp.add_pipe("llm", config=cfg) + doc = nlp("This is a test.") + torch.cuda.empty_cache() + assert not doc.user_data["llm_io"]["llm"]["response"][0].startswith( + doc.user_data["llm_io"]["llm"]["prompt"][0] + ) + + +@pytest.mark.gpu +@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") +def test_init_from_config(): + orig_config = Config().from_str(_NLP_CONFIG) + nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True) + assert nlp.pipe_names == ["llm"] + torch.cuda.empty_cache() + + +@pytest.mark.gpu +@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") +def test_invalid_model(): + orig_config = Config().from_str(_NLP_CONFIG) + config = copy.deepcopy(orig_config) + config["components"]["llm"]["model"]["name"] = "anything-else" + with pytest.raises(ValueError, match="unexpected value; permitted"): + spacy.util.load_model_from_config(config, auto_fill=True) + torch.cuda.empty_cache() From 592f3966c2a3474925e5f1f52019e86a9c9f8fda Mon Sep 17 00:00:00 2001 From: Raphael Mitsch Date: Sat, 6 Jan 2024 11:20:28 +0100 Subject: [PATCH 2/3] Add Mixtral. --- spacy_llm/models/hf/__init__.py | 2 + spacy_llm/models/hf/base.py | 8 ++ spacy_llm/models/hf/mistral.py | 7 +- spacy_llm/models/hf/mixtral.py | 108 +++++++++++++++++++++++++ spacy_llm/tests/models/test_mixtral.py | 70 ++++++++++++++++ 5 files changed, 191 insertions(+), 4 deletions(-) create mode 100644 spacy_llm/models/hf/mixtral.py create mode 100644 spacy_llm/tests/models/test_mixtral.py diff --git a/spacy_llm/models/hf/__init__.py b/spacy_llm/models/hf/__init__.py index f7414101..bc268528 100644 --- a/spacy_llm/models/hf/__init__.py +++ b/spacy_llm/models/hf/__init__.py @@ -3,6 +3,7 @@ from .falcon import falcon_hf from .llama2 import llama2_hf from .mistral import mistral_hf +from .mixtral import mixtral_hf from .openllama import openllama_hf from .phi2 import phi2_hf from .stablelm import stablelm_hf @@ -13,6 +14,7 @@ "falcon_hf", "llama2_hf", "mistral_hf", + "mixtral_hf", "openllama_hf", "phi2_hf", "stablelm_hf", diff --git a/spacy_llm/models/hf/base.py b/spacy_llm/models/hf/base.py index b8f8b7b7..7232321c 100644 --- a/spacy_llm/models/hf/base.py +++ b/spacy_llm/models/hf/base.py @@ -69,6 +69,14 @@ def __init__( f"Double-check you specified a valid dtype." ) from ex + # Recognize boolean attributes. + for key, value in self._config_init.items(): + if value in ("True", "False"): + self._config_init[key] = False if value == "False" else True + for key, value in self._config_run.items(): + if value in ("True", "False"): + self._config_run[key] = False if value == "False" else True + # Init HF model. HuggingFace.check_installation() self._check_model() diff --git a/spacy_llm/models/hf/mistral.py b/spacy_llm/models/hf/mistral.py index 3c5039a2..53883110 100644 --- a/spacy_llm/models/hf/mistral.py +++ b/spacy_llm/models/hf/mistral.py @@ -65,7 +65,7 @@ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: tokenized_input_ids = [ self._tokenizer( - prompt if not self._is_instruct else f"[INST] {prompt} [/INST]", + prompt if not self._is_instruct else f"[INST] {prompt} [/INST]", return_tensors="pt", ).input_ids for prompt in prompts_for_doc @@ -96,11 +96,10 @@ def mistral_hf( config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(), ) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]: """Generates Mistral instance that can execute a set of prompts and return the raw responses. - name (Literal): Name of the Falcon model. Has to be one of Falcon.get_model_names(). + name (Literal): Name of the Mistral model. Has to be one of Falcon.get_model_names(). config_init (Optional[Dict[str, Any]]): HF config for initializing the model. config_run (Optional[Dict[str, Any]]): HF config for running the model. - RETURNS (Callable[[Iterable[str]], Iterable[str]]): Falcon instance that can execute a set of prompts and return - the raw responses. + RETURNS (Mistral): Mistral instance that can execute a set of prompts and return the raw responses. """ return Mistral( name=name, config_init=config_init, config_run=config_run, context_length=8000 diff --git a/spacy_llm/models/hf/mixtral.py b/spacy_llm/models/hf/mixtral.py new file mode 100644 index 00000000..773c368d --- /dev/null +++ b/spacy_llm/models/hf/mixtral.py @@ -0,0 +1,108 @@ +from typing import Any, Callable, Dict, Iterable, List, Optional + +from confection import SimpleFrozenDict + +from ...compat import Literal, transformers +from ...registry.util import registry +from .base import HuggingFace + + +class Mixtral(HuggingFace): + MODEL_NAMES = Literal[ + "Mixtral-8x7B-v0.1", "Mixtral-8x7B-Instruct-v0.1" + ] # noqa: F722 + + def __init__( + self, + name: MODEL_NAMES, + config_init: Optional[Dict[str, Any]], + config_run: Optional[Dict[str, Any]], + context_length: Optional[int], + ): + self._tokenizer: Optional["transformers.AutoTokenizer"] = None + self._is_instruct = "instruct" in name + super().__init__( + name=name, + config_init=config_init, + config_run=config_run, + context_length=context_length, + ) + + assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase) + + # Instantiate GenerationConfig object from config dict. + self._hf_config_run = transformers.GenerationConfig.from_pretrained( + self._name, **self._config_run + ) + # To avoid deprecation warning regarding usage of `max_length`. + self._hf_config_run.max_new_tokens = self._hf_config_run.max_length + + def init_model(self) -> Any: + self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name) + init_cfg = self._config_init + device: Optional[str] = None + if "device" in init_cfg: + device = init_cfg.pop("device") + + model = transformers.AutoModelForCausalLM.from_pretrained( + self._name, **init_cfg, resume_download=True + ) + if device: + model.to(device) + + return model + + @property + def hf_account(self) -> str: + return "mistralai" + + def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override] + assert callable(self._tokenizer) + assert hasattr(self._model, "generate") + assert hasattr(self._tokenizer, "batch_decode") + responses: List[List[str]] = [] + + for prompts_for_doc in prompts: + prompts_for_doc = list(prompts_for_doc) + + tokenized_input_ids = [ + self._tokenizer( + prompt if not self._is_instruct else f"[INST] {prompt} [/INST]", + return_tensors="pt", + ).input_ids + for prompt in prompts_for_doc + ] + tokenized_input_ids = [ + tp.to(self._model.device) for tp in tokenized_input_ids + ] + + responses.append( + [ + self._tokenizer.decode( + self._model.generate( + input_ids=tok_ii, generation_config=self._hf_config_run + )[:, tok_ii.shape[1] :][0], + skip_special_tokens=True, + ) + for tok_ii in tokenized_input_ids + ] + ) + + return responses + + +@registry.llm_models("spacy.Mixtral.v1") +def mixtral_hf( + name: Mixtral.MODEL_NAMES, + config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(), + config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(), +) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]: + """Generates Mixtral instance that can execute a set of prompts and return the raw responses. + name (Literal): Name of the Mixtral model. Has to be one of Mixtral.get_model_names(). + config_init (Optional[Dict[str, Any]]): HF config for initializing the model. + config_run (Optional[Dict[str, Any]]): HF config for running the model. + RETURNS (Mixtral): Mixtral instance that can execute a set of prompts and return the raw responses. + """ + return Mixtral( + name=name, config_init=config_init, config_run=config_run, context_length=8000 + ) diff --git a/spacy_llm/tests/models/test_mixtral.py b/spacy_llm/tests/models/test_mixtral.py new file mode 100644 index 00000000..0dafad96 --- /dev/null +++ b/spacy_llm/tests/models/test_mixtral.py @@ -0,0 +1,70 @@ +import copy + +import pytest +import spacy +from confection import Config # type: ignore[import] +from thinc.compat import has_torch_cuda_gpu + +from ...compat import torch + +_PIPE_CFG = { + "model": { + "@llm_models": "spacy.Mixtral.v1", + "name": "Mixtral-8x7B-Instruct-v0.1", + }, + "task": {"@llm_tasks": "spacy.NoOp.v1"}, +} + +_NLP_CONFIG = """ + +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.task] +@llm_tasks = "spacy.NoOp.v1" + +[components.llm.model] +@llm_models = "spacy.Mixtral.v1" +name = "Mixtral-8x7B-Instruct-v0.1.1" +""" + + +@pytest.mark.gpu +@pytest.mark.skip(reason="CI runner needs more GPU memory") +@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") +def test_init(): + """Test initialization and simple run.""" + nlp = spacy.blank("en") + cfg = copy.deepcopy(_PIPE_CFG) + nlp.add_pipe("llm", config=cfg) + nlp("This is a test.") + torch.cuda.empty_cache() + + +@pytest.mark.gpu +@pytest.mark.skip(reason="CI runner needs more GPU memory") +@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") +def test_init_from_config(): + orig_config = Config().from_str(_NLP_CONFIG) + nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True) + assert nlp.pipe_names == ["llm"] + torch.cuda.empty_cache() + + +@pytest.mark.gpu +@pytest.mark.skip(reason="CI runner needs more GPU memory") +@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") +def test_invalid_model(): + orig_config = Config().from_str(_NLP_CONFIG) + config = copy.deepcopy(orig_config) + config["components"]["llm"]["model"]["name"] = "x" + with pytest.raises(ValueError, match="unexpected value; permitted"): + spacy.util.load_model_from_config(config, auto_fill=True) + torch.cuda.empty_cache() From e4d7aa508556cf9b30971bfad388cc6c73fcdbd1 Mon Sep 17 00:00:00 2001 From: Raphael Mitsch Date: Wed, 7 Feb 2024 09:52:06 +0100 Subject: [PATCH 3/3] Remove Mixtral. --- spacy_llm/models/hf/__init__.py | 2 - spacy_llm/models/hf/mixtral.py | 108 ------------------------- spacy_llm/tests/models/test_mixtral.py | 70 ---------------- 3 files changed, 180 deletions(-) delete mode 100644 spacy_llm/models/hf/mixtral.py delete mode 100644 spacy_llm/tests/models/test_mixtral.py diff --git a/spacy_llm/models/hf/__init__.py b/spacy_llm/models/hf/__init__.py index bc268528..f7414101 100644 --- a/spacy_llm/models/hf/__init__.py +++ b/spacy_llm/models/hf/__init__.py @@ -3,7 +3,6 @@ from .falcon import falcon_hf from .llama2 import llama2_hf from .mistral import mistral_hf -from .mixtral import mixtral_hf from .openllama import openllama_hf from .phi2 import phi2_hf from .stablelm import stablelm_hf @@ -14,7 +13,6 @@ "falcon_hf", "llama2_hf", "mistral_hf", - "mixtral_hf", "openllama_hf", "phi2_hf", "stablelm_hf", diff --git a/spacy_llm/models/hf/mixtral.py b/spacy_llm/models/hf/mixtral.py deleted file mode 100644 index 773c368d..00000000 --- a/spacy_llm/models/hf/mixtral.py +++ /dev/null @@ -1,108 +0,0 @@ -from typing import Any, Callable, Dict, Iterable, List, Optional - -from confection import SimpleFrozenDict - -from ...compat import Literal, transformers -from ...registry.util import registry -from .base import HuggingFace - - -class Mixtral(HuggingFace): - MODEL_NAMES = Literal[ - "Mixtral-8x7B-v0.1", "Mixtral-8x7B-Instruct-v0.1" - ] # noqa: F722 - - def __init__( - self, - name: MODEL_NAMES, - config_init: Optional[Dict[str, Any]], - config_run: Optional[Dict[str, Any]], - context_length: Optional[int], - ): - self._tokenizer: Optional["transformers.AutoTokenizer"] = None - self._is_instruct = "instruct" in name - super().__init__( - name=name, - config_init=config_init, - config_run=config_run, - context_length=context_length, - ) - - assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase) - - # Instantiate GenerationConfig object from config dict. - self._hf_config_run = transformers.GenerationConfig.from_pretrained( - self._name, **self._config_run - ) - # To avoid deprecation warning regarding usage of `max_length`. - self._hf_config_run.max_new_tokens = self._hf_config_run.max_length - - def init_model(self) -> Any: - self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name) - init_cfg = self._config_init - device: Optional[str] = None - if "device" in init_cfg: - device = init_cfg.pop("device") - - model = transformers.AutoModelForCausalLM.from_pretrained( - self._name, **init_cfg, resume_download=True - ) - if device: - model.to(device) - - return model - - @property - def hf_account(self) -> str: - return "mistralai" - - def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override] - assert callable(self._tokenizer) - assert hasattr(self._model, "generate") - assert hasattr(self._tokenizer, "batch_decode") - responses: List[List[str]] = [] - - for prompts_for_doc in prompts: - prompts_for_doc = list(prompts_for_doc) - - tokenized_input_ids = [ - self._tokenizer( - prompt if not self._is_instruct else f"[INST] {prompt} [/INST]", - return_tensors="pt", - ).input_ids - for prompt in prompts_for_doc - ] - tokenized_input_ids = [ - tp.to(self._model.device) for tp in tokenized_input_ids - ] - - responses.append( - [ - self._tokenizer.decode( - self._model.generate( - input_ids=tok_ii, generation_config=self._hf_config_run - )[:, tok_ii.shape[1] :][0], - skip_special_tokens=True, - ) - for tok_ii in tokenized_input_ids - ] - ) - - return responses - - -@registry.llm_models("spacy.Mixtral.v1") -def mixtral_hf( - name: Mixtral.MODEL_NAMES, - config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(), - config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(), -) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]: - """Generates Mixtral instance that can execute a set of prompts and return the raw responses. - name (Literal): Name of the Mixtral model. Has to be one of Mixtral.get_model_names(). - config_init (Optional[Dict[str, Any]]): HF config for initializing the model. - config_run (Optional[Dict[str, Any]]): HF config for running the model. - RETURNS (Mixtral): Mixtral instance that can execute a set of prompts and return the raw responses. - """ - return Mixtral( - name=name, config_init=config_init, config_run=config_run, context_length=8000 - ) diff --git a/spacy_llm/tests/models/test_mixtral.py b/spacy_llm/tests/models/test_mixtral.py deleted file mode 100644 index 0dafad96..00000000 --- a/spacy_llm/tests/models/test_mixtral.py +++ /dev/null @@ -1,70 +0,0 @@ -import copy - -import pytest -import spacy -from confection import Config # type: ignore[import] -from thinc.compat import has_torch_cuda_gpu - -from ...compat import torch - -_PIPE_CFG = { - "model": { - "@llm_models": "spacy.Mixtral.v1", - "name": "Mixtral-8x7B-Instruct-v0.1", - }, - "task": {"@llm_tasks": "spacy.NoOp.v1"}, -} - -_NLP_CONFIG = """ - -[nlp] -lang = "en" -pipeline = ["llm"] -batch_size = 128 - -[components] - -[components.llm] -factory = "llm" - -[components.llm.task] -@llm_tasks = "spacy.NoOp.v1" - -[components.llm.model] -@llm_models = "spacy.Mixtral.v1" -name = "Mixtral-8x7B-Instruct-v0.1.1" -""" - - -@pytest.mark.gpu -@pytest.mark.skip(reason="CI runner needs more GPU memory") -@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") -def test_init(): - """Test initialization and simple run.""" - nlp = spacy.blank("en") - cfg = copy.deepcopy(_PIPE_CFG) - nlp.add_pipe("llm", config=cfg) - nlp("This is a test.") - torch.cuda.empty_cache() - - -@pytest.mark.gpu -@pytest.mark.skip(reason="CI runner needs more GPU memory") -@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") -def test_init_from_config(): - orig_config = Config().from_str(_NLP_CONFIG) - nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True) - assert nlp.pipe_names == ["llm"] - torch.cuda.empty_cache() - - -@pytest.mark.gpu -@pytest.mark.skip(reason="CI runner needs more GPU memory") -@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") -def test_invalid_model(): - orig_config = Config().from_str(_NLP_CONFIG) - config = copy.deepcopy(orig_config) - config["components"]["llm"]["model"]["name"] = "x" - with pytest.raises(ValueError, match="unexpected value; permitted"): - spacy.util.load_model_from_config(config, auto_fill=True) - torch.cuda.empty_cache()