Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Phi-2 #410

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions spacy_llm/models/hf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from .falcon import falcon_hf
from .llama2 import llama2_hf
from .mistral import mistral_hf
from .mixtral import mixtral_hf
from .openllama import openllama_hf
from .phi2 import phi2_hf
from .stablelm import stablelm_hf

__all__ = [
Expand All @@ -12,6 +14,8 @@
"falcon_hf",
"llama2_hf",
"mistral_hf",
"mixtral_hf",
"openllama_hf",
"phi2_hf",
"stablelm_hf",
]
8 changes: 8 additions & 0 deletions spacy_llm/models/hf/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ def __init__(
f"Double-check you specified a valid dtype."
) from ex

# Recognize boolean attributes.
for key, value in self._config_init.items():
if value in ("True", "False"):
self._config_init[key] = False if value == "False" else True
for key, value in self._config_run.items():
if value in ("True", "False"):
self._config_run[key] = False if value == "False" else True

# Init HF model.
HuggingFace.check_installation()
self._check_model()
Expand Down
7 changes: 3 additions & 4 deletions spacy_llm/models/hf/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:

tokenized_input_ids = [
self._tokenizer(
prompt if not self._is_instruct else f"<s>[INST] {prompt} [/INST]",
prompt if not self._is_instruct else f"[INST] {prompt} [/INST]",
return_tensors="pt",
).input_ids
for prompt in prompts_for_doc
Expand Down Expand Up @@ -96,11 +96,10 @@ def mistral_hf(
config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
"""Generates Mistral instance that can execute a set of prompts and return the raw responses.
name (Literal): Name of the Falcon model. Has to be one of Falcon.get_model_names().
name (Literal): Name of the Mistral model. Has to be one of Falcon.get_model_names().
config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
config_run (Optional[Dict[str, Any]]): HF config for running the model.
RETURNS (Callable[[Iterable[str]], Iterable[str]]): Falcon instance that can execute a set of prompts and return
the raw responses.
RETURNS (Mistral): Mistral instance that can execute a set of prompts and return the raw responses.
"""
return Mistral(
name=name, config_init=config_init, config_run=config_run, context_length=8000
Expand Down
108 changes: 108 additions & 0 deletions spacy_llm/models/hf/mixtral.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from typing import Any, Callable, Dict, Iterable, List, Optional

from confection import SimpleFrozenDict

from ...compat import Literal, transformers
from ...registry.util import registry
from .base import HuggingFace


class Mixtral(HuggingFace):
MODEL_NAMES = Literal[
"Mixtral-8x7B-v0.1", "Mixtral-8x7B-Instruct-v0.1"
] # noqa: F722

def __init__(
self,
name: MODEL_NAMES,
config_init: Optional[Dict[str, Any]],
config_run: Optional[Dict[str, Any]],
context_length: Optional[int],
):
self._tokenizer: Optional["transformers.AutoTokenizer"] = None
self._is_instruct = "instruct" in name
super().__init__(
name=name,
config_init=config_init,
config_run=config_run,
context_length=context_length,
)

assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase)

# Instantiate GenerationConfig object from config dict.
self._hf_config_run = transformers.GenerationConfig.from_pretrained(
self._name, **self._config_run
)
# To avoid deprecation warning regarding usage of `max_length`.
self._hf_config_run.max_new_tokens = self._hf_config_run.max_length

def init_model(self) -> Any:
self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
init_cfg = self._config_init
device: Optional[str] = None
if "device" in init_cfg:
device = init_cfg.pop("device")

model = transformers.AutoModelForCausalLM.from_pretrained(
self._name, **init_cfg, resume_download=True
)
if device:
model.to(device)

return model

@property
def hf_account(self) -> str:
return "mistralai"

def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override]
assert callable(self._tokenizer)
assert hasattr(self._model, "generate")
assert hasattr(self._tokenizer, "batch_decode")
responses: List[List[str]] = []

for prompts_for_doc in prompts:
prompts_for_doc = list(prompts_for_doc)

tokenized_input_ids = [
self._tokenizer(
prompt if not self._is_instruct else f"[INST] {prompt} [/INST]",
return_tensors="pt",
).input_ids
for prompt in prompts_for_doc
]
tokenized_input_ids = [
tp.to(self._model.device) for tp in tokenized_input_ids
]

responses.append(
[
self._tokenizer.decode(
self._model.generate(
input_ids=tok_ii, generation_config=self._hf_config_run
)[:, tok_ii.shape[1] :][0],
skip_special_tokens=True,
)
for tok_ii in tokenized_input_ids
]
)

return responses


@registry.llm_models("spacy.Mixtral.v1")
def mixtral_hf(
name: Mixtral.MODEL_NAMES,
config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
"""Generates Mixtral instance that can execute a set of prompts and return the raw responses.
name (Literal): Name of the Mixtral model. Has to be one of Mixtral.get_model_names().
config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
config_run (Optional[Dict[str, Any]]): HF config for running the model.
RETURNS (Mixtral): Mixtral instance that can execute a set of prompts and return the raw responses.
"""
return Mixtral(
name=name, config_init=config_init, config_run=config_run, context_length=8000
)
115 changes: 115 additions & 0 deletions spacy_llm/models/hf/phi2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple

from confection import SimpleFrozenDict

from ...compat import Literal, transformers
from ...registry.util import registry
from .base import HuggingFace


class Phi2(HuggingFace):
MODEL_NAMES = Literal["phi-2"] # noqa: F722

def __init__(
self,
name: str,
config_init: Optional[Dict[str, Any]],
config_run: Optional[Dict[str, Any]],
context_length: Optional[int],
):
self._tokenizer: Optional["transformers.AutoTokenizer"] = None
super().__init__(
name=name,
config_init=config_init,
config_run=config_run,
context_length=context_length,
)

def init_model(self) -> "transformers.AutoModelForCausalLM":
"""Sets up HF model and needed utilities.
RETURNS (Any): HF model.
"""
# Initialize tokenizer and model.
self._tokenizer = transformers.AutoTokenizer.from_pretrained(
self._name, trust_remote_code=True
)
init_cfg = self._config_init
device: Optional[str] = None
if "device" in init_cfg:
device = init_cfg.pop("device")

model = transformers.AutoModelForCausalLM.from_pretrained(
self._name, **init_cfg
)
if device:
model.to(device)

return model

def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override]
assert callable(self._tokenizer)
responses: List[List[str]] = []

for prompts_for_doc in prompts:
tokenized_input_ids = [
self._tokenizer(
prompt, return_tensors="pt", return_attention_mask=False
).input_ids
for prompt in prompts_for_doc
]
tokenized_input_ids = [
tii.to(self._model.device) for tii in tokenized_input_ids
]

assert hasattr(self._model, "generate")
responses.append(
[
self._tokenizer.decode(
self._model.generate(input_ids=tii, **self._config_run)[
:, tii.shape[1] :
][0],
)
for tii in tokenized_input_ids
]
)

return responses

@property
def hf_account(self) -> str:
return "microsoft"

@staticmethod
def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
# See https://huggingface.co/microsoft/phi-2#sample-code for recommended setting combinations.
default_cfg_init, default_cfg_run = HuggingFace.compile_default_configs()
return (
{
**default_cfg_init,
"torch_dtype": "auto",
"device_map": "cuda",
"trust_remote_code": True,
},
{
**default_cfg_run,
"max_new_tokens": 200,
},
)


@registry.llm_models("spacy.Phi-2.v1")
def phi2_hf(
name: Phi2.MODEL_NAMES,
config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
"""Generates OpenLLaMA instance that can execute a set of prompts and return the raw responses.
name (Literal): Name of the OpenLLaMA model. Has to be one of OpenLLaMA.get_model_names().
config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
config_run (Optional[Dict[str, Any]]): HF config for running the model.
RETURNS (Callable[[Iterable[str]], Iterable[str]]): OpenLLaMA instance that can execute a set of prompts and return
the raw responses.
"""
return Phi2(
name=name, config_init=config_init, config_run=config_run, context_length=2048
)
70 changes: 70 additions & 0 deletions spacy_llm/tests/models/test_mixtral.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import copy

import pytest
import spacy
from confection import Config # type: ignore[import]
from thinc.compat import has_torch_cuda_gpu

from ...compat import torch

_PIPE_CFG = {
"model": {
"@llm_models": "spacy.Mixtral.v1",
"name": "Mixtral-8x7B-Instruct-v0.1",
},
"task": {"@llm_tasks": "spacy.NoOp.v1"},
}

_NLP_CONFIG = """

[nlp]
lang = "en"
pipeline = ["llm"]
batch_size = 128

[components]

[components.llm]
factory = "llm"

[components.llm.task]
@llm_tasks = "spacy.NoOp.v1"

[components.llm.model]
@llm_models = "spacy.Mixtral.v1"
name = "Mixtral-8x7B-Instruct-v0.1.1"
"""


@pytest.mark.gpu
@pytest.mark.skip(reason="CI runner needs more GPU memory")
@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
def test_init():
"""Test initialization and simple run."""
nlp = spacy.blank("en")
cfg = copy.deepcopy(_PIPE_CFG)
nlp.add_pipe("llm", config=cfg)
nlp("This is a test.")
torch.cuda.empty_cache()


@pytest.mark.gpu
@pytest.mark.skip(reason="CI runner needs more GPU memory")
@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
def test_init_from_config():
orig_config = Config().from_str(_NLP_CONFIG)
nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True)
assert nlp.pipe_names == ["llm"]
torch.cuda.empty_cache()


@pytest.mark.gpu
@pytest.mark.skip(reason="CI runner needs more GPU memory")
@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
def test_invalid_model():
orig_config = Config().from_str(_NLP_CONFIG)
config = copy.deepcopy(orig_config)
config["components"]["llm"]["model"]["name"] = "x"
with pytest.raises(ValueError, match="unexpected value; permitted"):
spacy.util.load_model_from_config(config, auto_fill=True)
torch.cuda.empty_cache()
Loading