Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync dev with main #369

Merged
merged 16 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ mypy>=0.990,<1.1.0; platform_machine != "aarch64" and python_version >= "3.7"
black==22.3.0
types-requests==2.28.11.16
# Prompting libraries needed for testing
langchain==0.0.302; python_version>="3.9"
openai>=0.27; python_version>="3.9"
langchain==0.0.331; python_version>="3.9"
# Workaround for LangChain bug: pin OpenAI version. To be removed after LangChain has been fixed - see
# https://github.com/langchain-ai/langchain/issues/12967.
openai>=0.27,<=0.28.1; python_version>="3.9"

# Necessary for running all local models on GPU.
transformers[sentencepiece]>=4.0.0
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[metadata]
version = 0.6.2
version = 0.6.3
description = Integrating LLMs into structured NLP pipelines
author = Explosion
author_email = [email protected]
Expand Down Expand Up @@ -44,7 +44,7 @@ spacy_misc =

[options.extras_require]
langchain =
langchain==0.0.249
langchain==0.0.335
transformers =
torch>=1.13.1,<2.0
transformers>=4.28.1,<5.0
Expand Down
39 changes: 37 additions & 2 deletions spacy_llm/models/hf/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,46 @@ def __init__(
inference_config (Dict[Any, Any]): HF config for model run.
"""
self._name = name if self.hf_account in name else f"{self.hf_account}/{name}"
self._config_init, self._config_run = self.compile_default_configs()
default_cfg_init, default_cfg_run = self.compile_default_configs()
self._config_init, self._config_run = default_cfg_init, default_cfg_run

if config_init:
self._config_init = {**self._config_init, **config_init}
if config_run:
self._config_run = {**self._config_run, **config_run}

# `device` and `device_map` are conflicting arguments - ensure they aren't both set.
if config_init:
# Case 1: both device and device_map explicitly set by user.
if "device" in config_init and "device_map" in config_init:
warnings.warn(
"`device` and `device_map` are conflicting arguments - don't set both. Dropping argument "
"`device`."
)
self._config_init.pop("device")
# Case 2: we have a CUDA GPU (and hence device="cuda:0" by default), but device_map is set by user.
elif "device" in default_cfg_init and "device_map" in config_init:
self._config_init.pop("device")
# Case 3: we don't have a CUDA GPU (and hence "device_map=auto" by default), but device is set by user.
elif "device_map" in default_cfg_init and "device" in config_init:
self._config_init.pop("device_map")

# Fetch proper torch.dtype, if specified.
if (
has_torch
and "torch_dtype" in self._config_init
and self._config_init["torch_dtype"] != "auto"
):
try:
self._config_init["torch_dtype"] = getattr(
torch, self._config_init["torch_dtype"]
)
except AttributeError as ex:
raise ValueError(
f"Invalid value {self._config_init['torch_dtype']} was specified for `torch_dtype`. "
f"Double-check you specified a valid dtype."
) from ex

# Init HF model.
HuggingFace.check_installation()
self._check_model()
Expand Down Expand Up @@ -89,7 +123,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
default_cfg_run: Dict[str, Any] = {}

if has_torch:
default_cfg_init["torch_dtype"] = torch.bfloat16
default_cfg_init["torch_dtype"] = "bfloat16"
if has_torch_cuda_gpu:
# this ensures it fails explicitely when GPU is not enabled or sufficient
default_cfg_init["device"] = "cuda:0"
Expand All @@ -106,6 +140,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
"Install CUDA to load and run the LLM on the GPU, or install 'accelerate' to dynamically "
"distribute the LLM on the CPU or even the hard disk. The latter may be slow."
)

return default_cfg_init, default_cfg_run

@abc.abstractmethod
Expand Down
1 change: 0 additions & 1 deletion spacy_llm/models/hf/falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def __init__(
config_run: Optional[Dict[str, Any]],
):
self._tokenizer: Optional["transformers.AutoTokenizer"] = None
self._device: Optional[str] = None
super().__init__(name=name, config_init=config_init, config_run=config_run)

assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase)
Expand Down
21 changes: 6 additions & 15 deletions spacy_llm/models/hf/mistral.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Callable, Dict, Iterable, Optional, Tuple
from typing import Any, Callable, Dict, Iterable, Optional

from confection import SimpleFrozenDict

Expand All @@ -17,7 +17,6 @@ def __init__(
config_run: Optional[Dict[str, Any]],
):
self._tokenizer: Optional["transformers.AutoTokenizer"] = None
self._device: Optional[str] = None
self._is_instruct = "instruct" in name
super().__init__(name=name, config_init=config_init, config_run=config_run)

Expand All @@ -33,14 +32,15 @@ def __init__(
def init_model(self) -> Any:
self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
init_cfg = self._config_init
device: Optional[str] = None
if "device" in init_cfg:
self._device = init_cfg.pop("device")
device = init_cfg.pop("device")

model = transformers.AutoModelForCausalLM.from_pretrained(
self._name, **init_cfg, resume_download=True
)
if self._device:
model.to(self._device)
if device:
model.to(device)

return model

Expand All @@ -61,8 +61,7 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[ove
).input_ids
for prompt in prompts
]
if self._device:
tokenized_input_ids = [tp.to(self._device) for tp in tokenized_input_ids]
tokenized_input_ids = [tp.to(self._model.device) for tp in tokenized_input_ids]

return [
self._tokenizer.decode(
Expand All @@ -74,14 +73,6 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[ove
for tok_ii in tokenized_input_ids
]

@staticmethod
def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
default_cfg_init, default_cfg_run = HuggingFace.compile_default_configs()
return (
default_cfg_init,
default_cfg_run,
)


@registry.llm_models("spacy.Mistral.v1")
def mistral_hf(
Expand Down
19 changes: 10 additions & 9 deletions spacy_llm/models/hf/openllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from confection import SimpleFrozenDict

from ...compat import Literal, torch, transformers
from ...compat import Literal, transformers
from ...registry.util import registry
from .base import HuggingFace

Expand All @@ -22,7 +22,6 @@ def __init__(
config_run: Optional[Dict[str, Any]],
):
self._tokenizer: Optional["transformers.AutoTokenizer"] = None
self._device: Optional[str] = None
super().__init__(name=name, config_init=config_init, config_run=config_run)

def init_model(self) -> "transformers.AutoModelForCausalLM":
Expand All @@ -32,14 +31,15 @@ def init_model(self) -> "transformers.AutoModelForCausalLM":
# Initialize tokenizer and model.
self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
init_cfg = self._config_init
device: Optional[str] = None
if "device" in init_cfg:
self._device = init_cfg.pop("device")
device = init_cfg.pop("device")

model = transformers.AutoModelForCausalLM.from_pretrained(
self._name, **init_cfg
)

if self._device:
model.to(self._device)
if device:
model.to(device)

return model

Expand All @@ -48,8 +48,9 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[ove
tokenized_input_ids = [
self._tokenizer(prompt, return_tensors="pt").input_ids for prompt in prompts
]
if self._device:
tokenized_input_ids = [tii.to(self._device) for tii in tokenized_input_ids]
tokenized_input_ids = [
tii.to(self._model.device) for tii in tokenized_input_ids
]

assert hasattr(self._model, "generate")
return [
Expand All @@ -71,7 +72,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
return (
{
**default_cfg_init,
"torch_dtype": torch.float16,
"torch_dtype": "float16",
},
{**default_cfg_run, "max_new_tokens": 32},
)
Expand Down
13 changes: 6 additions & 7 deletions spacy_llm/models/hf/stablelm.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def __init__(
):
self._tokenizer: Optional["transformers.AutoTokenizer"] = None
self._is_tuned = "tuned" in name
self._device: Optional[str] = None
super().__init__(name=name, config_init=config_init, config_run=config_run)

def init_model(self) -> "transformers.AutoModelForCausalLM":
Expand All @@ -51,14 +50,15 @@ def init_model(self) -> "transformers.AutoModelForCausalLM":
"""
self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
init_cfg = self._config_init
device: Optional[str] = None
if "device" in init_cfg:
self._device = init_cfg.pop("device")
device = init_cfg.pop("device")

model = transformers.AutoModelForCausalLM.from_pretrained(
self._name, **init_cfg
)

if self._device:
model.half().to(self._device)
if device:
model.half().to(device)

return model

Expand All @@ -80,8 +80,7 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[ove
]
)
]
if self._device:
tokenized_input_ids = [tp.to(self._device) for tp in tokenized_input_ids]
tokenized_input_ids = [tp.to(self._model.device) for tp in tokenized_input_ids]

assert hasattr(self._model, "generate")
return [
Expand Down
2 changes: 1 addition & 1 deletion spacy_llm/models/langchain/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_type_to_cls_dict() -> Dict[
"""Returns langchain.llms.type_to_cls_dict.
RETURNS (Dict[str, Type[langchain.base_language.BaseLanguageModel]]): langchain.llms.type_to_cls_dict.
"""
return langchain.llms.type_to_cls_dict
return getattr(langchain.llms, "type_to_cls_dict")

def __call__(self, prompts: Iterable[Any]) -> Iterable[Any]:
"""Executes prompts on specified API.
Expand Down
25 changes: 1 addition & 24 deletions spacy_llm/models/rest/anthropic/model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import warnings
from enum import Enum
from typing import Any, Dict, Iterable, List, Sized, Tuple
from typing import Any, Dict, Iterable, List, Sized

import requests # type: ignore[import]
import srsly # type: ignore[import]
Expand Down Expand Up @@ -107,26 +107,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]:

assert len(api_responses) == len(prompts)
return api_responses

@classmethod
def get_model_names(cls) -> Tuple[str, ...]:
return (
# claude-2
"claude-2",
"claude-2-100k",
# claude-1
"claude-1",
"claude-1-100k",
# claude-instant-1
"claude-instant-1",
"claude-instant-1-100k",
# claude-instant-1.1
"claude-instant-1.1",
"claude-instant-1.1-100k",
# claude-1.3
"claude-1.3",
"claude-1.3-100k",
# others
"claude-1.0",
"claude-1.2",
)
11 changes: 1 addition & 10 deletions spacy_llm/models/rest/azure/model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import warnings
from enum import Enum
from typing import Any, Dict, Iterable, List, Sized, Tuple
from typing import Any, Dict, Iterable, List, Sized

import requests # type: ignore[import]
import srsly # type: ignore[import]
Expand Down Expand Up @@ -147,12 +147,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]:
api_responses.append(response.get("text", srsly.json_dumps(response)))

return api_responses

@classmethod
def get_model_names(cls) -> Tuple[str, ...]:
# We treat the deployment name as "model name", hence it can be arbitrary.
return ("",)

def _check_model(self) -> None:
# We treat the deployment name as "model name", hence it can be arbitrary.
pass
17 changes: 1 addition & 16 deletions spacy_llm/models/rest/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import abc
import time
from enum import Enum
from typing import Any, Callable, Dict, Iterable, Optional, Tuple
from typing import Any, Callable, Dict, Iterable, Optional

import requests # type: ignore
from requests import ConnectTimeout, ReadTimeout
Expand Down Expand Up @@ -61,30 +61,15 @@ def __init__(
assert self._interval > 0
assert self._max_request_time > 0

self._check_model()
self._verify_auth()

def _check_model(self) -> None:
"""Checks whether model is supported. Raises if it isn't."""
if self._name not in self.get_model_names():
raise ValueError(
f"Model '{self._name}' is not supported - select one of {self.get_model_names()} instead"
)

@abc.abstractmethod
def __call__(self, prompts: Iterable[str]) -> Iterable[str]:
"""Executes prompts on specified API.
prompts (Iterable[str]): Prompts to execute.
RETURNS (Iterable[str]): API responses.
"""

@classmethod
@abc.abstractmethod
def get_model_names(cls) -> Tuple[str, ...]:
"""Names of supported models.
RETURNS (Tuple[str]): Names of supported models.
"""

@property
@abc.abstractmethod
def credentials(self) -> Dict[str, str]:
Expand Down
8 changes: 2 additions & 6 deletions spacy_llm/models/rest/cohere/model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import warnings
from enum import Enum
from typing import Any, Dict, Iterable, List, Sized, Tuple
from typing import Any, Dict, Iterable, List, Sized

import requests # type: ignore[import]
import srsly # type: ignore[import]
Expand Down Expand Up @@ -54,7 +54,7 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]:
call_method=requests.post,
url=self._endpoint,
headers=headers,
json={**json_data, **self._config},
json={**json_data, **self._config, "model": self._name},
timeout=self._max_request_time,
)
try:
Expand Down Expand Up @@ -111,7 +111,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]:
else:
api_responses.append(srsly.json_dumps(response))
return api_responses

@classmethod
def get_model_names(cls) -> Tuple[str, ...]:
return "command", "command-light", "command-light-nightly", "command-nightly"
Loading