Resolve merge conflicts.

explosion · Nov 14, 2023 · 3218541 · 3218541
2 parents e508499 + bbab211
commit 3218541
Show file tree

Hide file tree

Showing 27 changed files with 473 additions and 192 deletions.
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -7,8 +7,10 @@ mypy>=0.990,<1.1.0; platform_machine != "aarch64" and python_version >= "3.7"
 black==22.3.0
 types-requests==2.28.11.16
 # Prompting libraries needed for testing
-langchain==0.0.302; python_version>="3.9"
-openai>=0.27; python_version>="3.9"
+langchain==0.0.331; python_version>="3.9"
+# Workaround for LangChain bug: pin OpenAI version. To be removed after LangChain has been fixed - see
+# https://github.com/langchain-ai/langchain/issues/12967.
+openai>=0.27,<=0.28.1; python_version>="3.9"
 
 # Necessary for running all local models on GPU.
 transformers[sentencepiece]>=4.0.0

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-version = 0.6.2
+version = 0.6.3
 description = Integrating LLMs into structured NLP pipelines
 author = Explosion
 author_email = [email protected]
@@ -44,7 +44,7 @@ spacy_misc =
 
 [options.extras_require]
 langchain =
-    langchain==0.0.249
+    langchain==0.0.335
 transformers =
     torch>=1.13.1,<2.0
     transformers>=4.28.1,<5.0

diff --git a/spacy_llm/models/hf/base.py b/spacy_llm/models/hf/base.py
@@ -27,12 +27,46 @@ def __init__(
         inference_config (Dict[Any, Any]): HF config for model run.
         """
         self._name = name if self.hf_account in name else f"{self.hf_account}/{name}"
-        self._config_init, self._config_run = self.compile_default_configs()
+        default_cfg_init, default_cfg_run = self.compile_default_configs()
+        self._config_init, self._config_run = default_cfg_init, default_cfg_run
+
         if config_init:
             self._config_init = {**self._config_init, **config_init}
         if config_run:
             self._config_run = {**self._config_run, **config_run}
 
+        # `device` and `device_map` are conflicting arguments - ensure they aren't both set.
+        if config_init:
+            # Case 1: both device and device_map explicitly set by user.
+            if "device" in config_init and "device_map" in config_init:
+                warnings.warn(
+                    "`device` and `device_map` are conflicting arguments - don't set both. Dropping argument "
+                    "`device`."
+                )
+                self._config_init.pop("device")
+            # Case 2: we have a CUDA GPU (and hence device="cuda:0" by default), but device_map is set by user.
+            elif "device" in default_cfg_init and "device_map" in config_init:
+                self._config_init.pop("device")
+            # Case 3: we don't have a CUDA GPU (and hence "device_map=auto" by default), but device is set by user.
+            elif "device_map" in default_cfg_init and "device" in config_init:
+                self._config_init.pop("device_map")
+
+        # Fetch proper torch.dtype, if specified.
+        if (
+            has_torch
+            and "torch_dtype" in self._config_init
+            and self._config_init["torch_dtype"] != "auto"
+        ):
+            try:
+                self._config_init["torch_dtype"] = getattr(
+                    torch, self._config_init["torch_dtype"]
+                )
+            except AttributeError as ex:
+                raise ValueError(
+                    f"Invalid value {self._config_init['torch_dtype']} was specified for `torch_dtype`. "
+                    f"Double-check you specified a valid dtype."
+                ) from ex
+
         # Init HF model.
         HuggingFace.check_installation()
         self._check_model()
@@ -96,7 +130,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
         default_cfg_run: Dict[str, Any] = {}
 
         if has_torch:
-            default_cfg_init["torch_dtype"] = torch.bfloat16
+            default_cfg_init["torch_dtype"] = "bfloat16"
             if has_torch_cuda_gpu:
                 # this ensures it fails explicitely when GPU is not enabled or sufficient
                 default_cfg_init["device"] = "cuda:0"
@@ -113,6 +147,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
                     "Install CUDA to load and run the LLM on the GPU, or install 'accelerate' to dynamically "
                     "distribute the LLM on the CPU or even the hard disk. The latter may be slow."
                 )
+
         return default_cfg_init, default_cfg_run
 
     @abc.abstractmethod

diff --git a/spacy_llm/models/hf/falcon.py b/spacy_llm/models/hf/falcon.py
@@ -19,7 +19,6 @@ def __init__(
         config_run: Optional[Dict[str, Any]],
     ):
         self._tokenizer: Optional["transformers.AutoTokenizer"] = None
-        self._device: Optional[str] = None
         super().__init__(name=name, config_init=config_init, config_run=config_run)
 
         assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase)

diff --git a/spacy_llm/models/hf/mistral.py b/spacy_llm/models/hf/mistral.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+from typing import Any, Callable, Dict, Iterable, List, Optional
 
 from confection import SimpleFrozenDict
 
@@ -17,7 +17,6 @@ def __init__(
         config_run: Optional[Dict[str, Any]],
     ):
         self._tokenizer: Optional["transformers.AutoTokenizer"] = None
-        self._device: Optional[str] = None
         self._is_instruct = "instruct" in name
         super().__init__(name=name, config_init=config_init, config_run=config_run)
 
@@ -33,14 +32,15 @@ def __init__(
     def init_model(self) -> Any:
         self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
         init_cfg = self._config_init
+        device: Optional[str] = None
         if "device" in init_cfg:
-            self._device = init_cfg.pop("device")
+            device = init_cfg.pop("device")
 
         model = transformers.AutoModelForCausalLM.from_pretrained(
             self._name, **init_cfg, resume_download=True
         )
-        if self._device:
-            model.to(self._device)
+        if device:
+            model.to(device)
 
         return model
 
@@ -64,10 +64,9 @@ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:
                 ).input_ids
                 for prompt in prompts_for_doc
             ]
-            if self._device:
-                tokenized_input_ids = [
-                    tp.to(self._device) for tp in tokenized_input_ids
-                ]
+            tokenized_input_ids = [
+                tp.to(self._model.device) for tp in tokenized_input_ids
+            ]
 
             responses.append(
                 [
@@ -83,14 +82,6 @@ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:
 
         return responses
 
-    @staticmethod
-    def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
-        default_cfg_init, default_cfg_run = HuggingFace.compile_default_configs()
-        return (
-            default_cfg_init,
-            default_cfg_run,
-        )
-
     @property
     def context_length(self) -> int:
         return 8000

diff --git a/spacy_llm/models/hf/openllama.py b/spacy_llm/models/hf/openllama.py
@@ -2,7 +2,7 @@
 
 from confection import SimpleFrozenDict
 
-from ...compat import Literal, torch, transformers
+from ...compat import Literal, transformers
 from ...registry.util import registry
 from .base import HuggingFace
 
@@ -22,7 +22,6 @@ def __init__(
         config_run: Optional[Dict[str, Any]],
     ):
         self._tokenizer: Optional["transformers.AutoTokenizer"] = None
-        self._device: Optional[str] = None
         super().__init__(name=name, config_init=config_init, config_run=config_run)
 
     def init_model(self) -> "transformers.AutoModelForCausalLM":
@@ -32,14 +31,15 @@ def init_model(self) -> "transformers.AutoModelForCausalLM":
         # Initialize tokenizer and model.
         self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
         init_cfg = self._config_init
+        device: Optional[str] = None
         if "device" in init_cfg:
-            self._device = init_cfg.pop("device")
+            device = init_cfg.pop("device")
+
         model = transformers.AutoModelForCausalLM.from_pretrained(
             self._name, **init_cfg
         )
-
-        if self._device:
-            model.to(self._device)
+        if device:
+            model.to(device)
 
         return model
 
@@ -52,10 +52,9 @@ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:
                 self._tokenizer(prompt, return_tensors="pt").input_ids
                 for prompt in prompts_for_doc
             ]
-            if self._device:
-                tokenized_input_ids = [
-                    tii.to(self._device) for tii in tokenized_input_ids
-                ]
+            tokenized_input_ids = [
+                tii.to(self._model.device) for tii in tokenized_input_ids
+            ]
 
             assert hasattr(self._model, "generate")
             responses.append(
@@ -81,7 +80,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
         return (
             {
                 **default_cfg_init,
-                "torch_dtype": torch.float16,
+                "torch_dtype": "float16",
             },
             {**default_cfg_run, "max_new_tokens": 32},
         )

diff --git a/spacy_llm/models/hf/stablelm.py b/spacy_llm/models/hf/stablelm.py
@@ -42,7 +42,6 @@ def __init__(
     ):
         self._tokenizer: Optional["transformers.AutoTokenizer"] = None
         self._is_tuned = "tuned" in name
-        self._device: Optional[str] = None
         super().__init__(name=name, config_init=config_init, config_run=config_run)
 
     def init_model(self) -> "transformers.AutoModelForCausalLM":
@@ -51,14 +50,15 @@ def init_model(self) -> "transformers.AutoModelForCausalLM":
         """
         self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
         init_cfg = self._config_init
+        device: Optional[str] = None
         if "device" in init_cfg:
-            self._device = init_cfg.pop("device")
+            device = init_cfg.pop("device")
+
         model = transformers.AutoModelForCausalLM.from_pretrained(
             self._name, **init_cfg
         )
-
-        if self._device:
-            model.half().to(self._device)
+        if device:
+            model.half().to(device)
 
         return model
 
@@ -83,10 +83,9 @@ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:
                     ]
                 )
             ]
-            if self._device:
-                tokenized_input_ids = [
-                    tp.to(self._device) for tp in tokenized_input_ids
-                ]
+            tokenized_input_ids = [
+                tp.to(self._model.device) for tp in tokenized_input_ids
+            ]
 
             assert hasattr(self._model, "generate")
             responses.append(

diff --git a/spacy_llm/models/langchain/model.py b/spacy_llm/models/langchain/model.py
@@ -43,7 +43,7 @@ def get_type_to_cls_dict() -> Dict[
         """Returns langchain.llms.type_to_cls_dict.
         RETURNS (Dict[str, Type[langchain.base_language.BaseLanguageModel]]): langchain.llms.type_to_cls_dict.
         """
-        return langchain.llms.type_to_cls_dict
+        return getattr(langchain.llms, "type_to_cls_dict")
 
     def __call__(self, prompts: Iterable[Iterable[Any]]) -> Iterable[Iterable[Any]]:
         """Executes prompts on specified API.

diff --git a/spacy_llm/models/rest/base.py b/spacy_llm/models/rest/base.py
@@ -1,7 +1,7 @@
 import abc
 import time
 from enum import Enum
-from typing import Any, Callable, Dict, Iterable, Optional, Tuple
+from typing import Any, Callable, Dict, Iterable, Optional
 
 import requests  # type: ignore
 from requests import ConnectTimeout, ReadTimeout
@@ -61,30 +61,15 @@ def __init__(
         assert self._interval > 0
         assert self._max_request_time > 0
 
-        self._check_model()
         self._verify_auth()
 
-    def _check_model(self) -> None:
-        """Checks whether model is supported. Raises if it isn't."""
-        if self._name not in self.get_model_names():
-            raise ValueError(
-                f"Model '{self._name}' is not supported - select one of {self.get_model_names()} instead"
-            )
-
     @abc.abstractmethod
     def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:
         """Executes prompts on specified API.
         prompts (Iterable[Iterable[str]]): Prompts to execute.
         RETURNS (Iterable[Iterable[str]]): API responses.
         """
 
-    @classmethod
-    def get_model_names(cls) -> Tuple[str, ...]:
-        """Names of supported models.
-        RETURNS (Tuple[str]): Names of supported models.
-        """
-        return tuple(cls._get_context_lengths().keys())
-
     @staticmethod
     @abc.abstractmethod
     def _get_context_lengths() -> Dict[str, int]:
@@ -97,6 +82,7 @@ def context_length(self) -> int:
         """Returns context length in number of tokens for this model.
         RETURNS (int): Max. number of tokens in allowed in prompt for the current model.
         """
+        # todo if context length not available in dict: accept param, otherwise fail?
         return self._get_context_lengths()[self._name]
 
     @property