diff --git a/spacy_llm/models/hf/__init__.py b/spacy_llm/models/hf/__init__.py
index b3afbb71..f7414101 100644
--- a/spacy_llm/models/hf/__init__.py
+++ b/spacy_llm/models/hf/__init__.py
@@ -4,6 +4,7 @@
 from .llama2 import llama2_hf
 from .mistral import mistral_hf
 from .openllama import openllama_hf
+from .phi2 import phi2_hf
 from .stablelm import stablelm_hf
 
 __all__ = [
@@ -13,5 +14,6 @@
     "llama2_hf",
     "mistral_hf",
     "openllama_hf",
+    "phi2_hf",
     "stablelm_hf",
 ]
diff --git a/spacy_llm/models/hf/base.py b/spacy_llm/models/hf/base.py
index b8f8b7b7..7232321c 100644
--- a/spacy_llm/models/hf/base.py
+++ b/spacy_llm/models/hf/base.py
@@ -69,6 +69,14 @@ def __init__(
                     f"Double-check you specified a valid dtype."
                 ) from ex
 
+        # Recognize boolean attributes.
+        for key, value in self._config_init.items():
+            if value in ("True", "False"):
+                self._config_init[key] = False if value == "False" else True
+        for key, value in self._config_run.items():
+            if value in ("True", "False"):
+                self._config_run[key] = False if value == "False" else True
+
         # Init HF model.
         HuggingFace.check_installation()
         self._check_model()
diff --git a/spacy_llm/models/hf/mistral.py b/spacy_llm/models/hf/mistral.py
index c80d636e..d6a13d92 100644
--- a/spacy_llm/models/hf/mistral.py
+++ b/spacy_llm/models/hf/mistral.py
@@ -65,7 +65,7 @@ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:
 
             tokenized_input_ids = [
                 self._tokenizer(
-                    prompt if not self._is_instruct else f"<s>[INST] {prompt} [/INST]",
+                    prompt if not self._is_instruct else f"[INST] {prompt} [/INST]",
                     return_tensors="pt",
                 ).input_ids
                 for prompt in prompts_for_doc
diff --git a/spacy_llm/models/hf/phi2.py b/spacy_llm/models/hf/phi2.py
new file mode 100644
index 00000000..be195347
--- /dev/null
+++ b/spacy_llm/models/hf/phi2.py
@@ -0,0 +1,115 @@
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+
+from confection import SimpleFrozenDict
+
+from ...compat import Literal, transformers
+from ...registry.util import registry
+from .base import HuggingFace
+
+
+class Phi2(HuggingFace):
+    MODEL_NAMES = Literal["phi-2"]  # noqa: F722
+
+    def __init__(
+        self,
+        name: str,
+        config_init: Optional[Dict[str, Any]],
+        config_run: Optional[Dict[str, Any]],
+        context_length: Optional[int],
+    ):
+        self._tokenizer: Optional["transformers.AutoTokenizer"] = None
+        super().__init__(
+            name=name,
+            config_init=config_init,
+            config_run=config_run,
+            context_length=context_length,
+        )
+
+    def init_model(self) -> "transformers.AutoModelForCausalLM":
+        """Sets up HF model and needed utilities.
+        RETURNS (Any): HF model.
+        """
+        # Initialize tokenizer and model.
+        self._tokenizer = transformers.AutoTokenizer.from_pretrained(
+            self._name, trust_remote_code=True
+        )
+        init_cfg = self._config_init
+        device: Optional[str] = None
+        if "device" in init_cfg:
+            device = init_cfg.pop("device")
+
+        model = transformers.AutoModelForCausalLM.from_pretrained(
+            self._name, **init_cfg
+        )
+        if device:
+            model.to(device)
+
+        return model
+
+    def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:  # type: ignore[override]
+        assert callable(self._tokenizer)
+        responses: List[List[str]] = []
+
+        for prompts_for_doc in prompts:
+            tokenized_input_ids = [
+                self._tokenizer(
+                    prompt, return_tensors="pt", return_attention_mask=False
+                ).input_ids
+                for prompt in prompts_for_doc
+            ]
+            tokenized_input_ids = [
+                tii.to(self._model.device) for tii in tokenized_input_ids
+            ]
+
+            assert hasattr(self._model, "generate")
+            responses.append(
+                [
+                    self._tokenizer.decode(
+                        self._model.generate(input_ids=tii, **self._config_run)[
+                            :, tii.shape[1] :
+                        ][0],
+                    )
+                    for tii in tokenized_input_ids
+                ]
+            )
+
+        return responses
+
+    @property
+    def hf_account(self) -> str:
+        return "microsoft"
+
+    @staticmethod
+    def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        # See https://huggingface.co/microsoft/phi-2#sample-code for recommended setting combinations.
+        default_cfg_init, default_cfg_run = HuggingFace.compile_default_configs()
+        return (
+            {
+                **default_cfg_init,
+                "torch_dtype": "auto",
+                "device_map": "cuda",
+                "trust_remote_code": True,
+            },
+            {
+                **default_cfg_run,
+                "max_new_tokens": 200,
+            },
+        )
+
+
+@registry.llm_models("spacy.Phi-2.v1")
+def phi2_hf(
+    name: Phi2.MODEL_NAMES,
+    config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
+    config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
+) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
+    """Generates OpenLLaMA instance that can execute a set of prompts and return the raw responses.
+    name (Literal): Name of the OpenLLaMA model. Has to be one of OpenLLaMA.get_model_names().
+    config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
+    config_run (Optional[Dict[str, Any]]): HF config for running the model.
+    RETURNS (Callable[[Iterable[str]], Iterable[str]]): OpenLLaMA instance that can execute a set of prompts and return
+        the raw responses.
+    """
+    return Phi2(
+        name=name, config_init=config_init, config_run=config_run, context_length=2048
+    )
diff --git a/spacy_llm/tests/models/test_phi2.py b/spacy_llm/tests/models/test_phi2.py
new file mode 100644
index 00000000..771b0462
--- /dev/null
+++ b/spacy_llm/tests/models/test_phi2.py
@@ -0,0 +1,85 @@
+import copy
+
+import pytest
+import spacy
+from confection import Config  # type: ignore[import]
+from thinc.compat import has_torch_cuda_gpu
+
+from ...compat import torch
+
+_PIPE_CFG = {
+    "model": {
+        "@llm_models": "spacy.Phi-2.v1",
+        "name": "phi-2",
+    },
+    "task": {"@llm_tasks": "spacy.NoOp.v1"},
+    "save_io": True,
+}
+
+_NLP_CONFIG = """
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+save_io = True
+
+[components.llm.task]
+@llm_tasks = "spacy.NoOp.v1"
+
+[components.llm.model]
+@llm_models = spacy.Phi-2.v1
+name = phi-2
+"""
+
+
+@pytest.mark.gpu
+@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
+def test_init():
+    """Test initialization and simple run."""
+    nlp = spacy.blank("en")
+    nlp.add_pipe("llm", config=_PIPE_CFG)
+    doc = nlp("This is a test.")
+    torch.cuda.empty_cache()
+    assert not doc.user_data["llm_io"]["llm"]["response"][0].startswith(
+        doc.user_data["llm_io"]["llm"]["prompt"][0]
+    )
+
+
+@pytest.mark.gpu
+@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
+def test_init_with_set_config():
+    """Test initialization and simple run with changed config."""
+    nlp = spacy.blank("en")
+    cfg = copy.deepcopy(_PIPE_CFG)
+    cfg["model"]["config_run"] = {"max_new_tokens": 32}
+    nlp.add_pipe("llm", config=cfg)
+    doc = nlp("This is a test.")
+    torch.cuda.empty_cache()
+    assert not doc.user_data["llm_io"]["llm"]["response"][0].startswith(
+        doc.user_data["llm_io"]["llm"]["prompt"][0]
+    )
+
+
+@pytest.mark.gpu
+@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
+def test_init_from_config():
+    orig_config = Config().from_str(_NLP_CONFIG)
+    nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True)
+    assert nlp.pipe_names == ["llm"]
+    torch.cuda.empty_cache()
+
+
+@pytest.mark.gpu
+@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
+def test_invalid_model():
+    orig_config = Config().from_str(_NLP_CONFIG)
+    config = copy.deepcopy(orig_config)
+    config["components"]["llm"]["model"]["name"] = "anything-else"
+    with pytest.raises(ValueError, match="unexpected value; permitted"):
+        spacy.util.load_model_from_config(config, auto_fill=True)
+    torch.cuda.empty_cache()