From 75e9ac279a508f3972745a54f351560f1fe916c0 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Fri, 8 Sep 2023 10:30:12 +0200
Subject: [PATCH 1/4] Release 0.5.0 (#287)

* bump to 0.5.0

* fix references to backend

* update streamlit example

* remove redundant field parameter from parse_reponses

* fix typo in test examples

* update Dolly textcat task version to v3

* migration guide, explaining CoT NER

* fix details element

* fix typo

* Add accuracy explanation

* small edit

* remove newline

* prettier
---
 migration_guide.md                          | 116 ++++++++++++++++++--
 setup.cfg                                   |   2 +-
 spacy_llm/pipeline/llm.py                   |   2 +-
 spacy_llm/tasks/sentiment/parser.py         |   1 -
 spacy_llm/ty.py                             |   2 +-
 usage_examples/streamlit/streamlit_app.py   |  16 +--
 usage_examples/tests/test_usage_examples.py |   4 +-
 usage_examples/textcat_dolly/fewshot.cfg    |   2 +-
 usage_examples/textcat_dolly/zeroshot.cfg   |   2 +-
 9 files changed, 124 insertions(+), 23 deletions(-)
diff --git a/migration_guide.md b/migration_guide.md
index 622a26fd..612bcf8f 100644
--- a/migration_guide.md
+++ b/migration_guide.md
@@ -1,20 +1,113 @@
 # Migration guides
 
 <details open>
+  <summary>0.4.x to 0.5.x</summary>
+
+## `0.4.x` to `0.5.x`
+
+`0.5.x` includes internal refactoring that should have minimal to zero impact to the user experience.
+Mostly, code and config files from `0.4.x` should just work on `0.5.x`.
+
+### New Chain-of-Thought NER prompting
+
+We've implemented Chain-of-Thought (CoT) prompting for SpanCat and NER tasks,
+based on the
+[PromptNER paper](https://arxiv.org/pdf/2305.15444.pdf) by Ashok and Lipton
+(2023). This implementation is available as `spacy.SpanCat.v3` and `spacy.NER.v3`.
+On an internal use-case, we've found this implementation to be much more accurate
+than the `v1` and `v2` versions - with an increase of F-score of up to 15
+percentage points.
+
+For `v3`, zero-shot prompting should remain pretty much the same, though behind the scenes,
+a dummy prompt example will be used by the CoT implementation.
+For few-shot learning, the provided examples need to be provided in a slightly
+[different format](https://spacy.io/api/large-language-models#ner) than the `v1` and `v2` versions.
+
+First, you can provide an explicit `description` of what entities should look like.
+
+In `0.4.x`:
+
+```ini
+[components.llm.task]
+@llm_tasks = "spacy.NER.v2"
+labels = ["DISH", "INGREDIENT", "EQUIPMENT"]
+```
+
+In `0.5.x`:
+
+```ini
+[components.llm.task]
+@llm_tasks = "spacy.NER.v3"
+labels = ["DISH", "INGREDIENT", "EQUIPMENT"]
+description = Entities are the names food dishes,
+    ingredients, and any kind of cooking equipment.
+    Adjectives, verbs, adverbs are not entities.
+    Pronouns are not entities.
+```
+
+Further, the examples for few-shot learning look different, and you can include both positive as well as negative examples
+using the new fields `is_entity` and `reason`.
+
+In `0.4.x`:
+
+```json
+[
+  {
+    "text": "You can't get a great chocolate flavor with carob.",
+    "entities": {
+      "INGREDIENT": ["carob"]
+    }
+  },
+    ...
+]
+```
+
+In `0.5.x`:
+
+```json
+[
+    {
+        "text": "You can't get a great chocolate flavor with carob.",
+        "spans": [
+            {
+                "text": "chocolate",
+                "is_entity": false,
+                "label": "==NONE==",
+                "reason": "is a flavor in this context, not an ingredient"
+            },
+            {
+                "text": "carob",
+                "is_entity": true,
+                "label": "INGREDIENT",
+                "reason": "is an ingredient to add chocolate flavor"
+            }
+        ]
+    },
+    ...
+]
+```
+
+For a full example using 0.5.0 with Chain-of-Thought prompting for NER, see
+[this usage example](https://github.com/explosion/spacy-llm/tree/main/usage_examples/ner_v3_openai).
+
+</details>
+
+<details>
   <summary>0.3.x to 0.4.x</summary>
 
 ## `0.3.x` to `0.4.x`
 
-`0.4.x` significantly refactors the code to make it more robust and the config more intuitive. 0.4.0 changes the config 
+`0.4.x` significantly refactors the code to make it more robust and the config more intuitive. 0.4.0 changes the config
 paradigm from `backend`- to `model`-centric. This is reflected in the external API in a different config structure.
 
 Remember that there are three different types of models: the first uses the native REST implementation to communicate
 with hosted LLMs, the second builds on HuggingFace's `transformers` model to run models locally and the third leverages
-`langchain` to operate on hosted or local models. While the config for all three is rather similar (especially in 
+`langchain` to operate on hosted or local models. While the config for all three is rather similar (especially in
 0.4.x), there are differences in how these models have to be configured. We show how to migrate your config from 0.3.x
 to 0.4.x for each of these model types.
 
-### All model types 
+### All model types
+
 - The registry name has changed - instead of `@llm_backends`, use `@llm_models`.
 - The `api` attribute has been removed.
 
@@ -24,41 +117,48 @@ This is the default method to communicate with hosted models. Whenever you don't
 (see section at the bottom) or run models locally, you are using this kind of model.
 
 In `0.3.x`:
+
 ```ini
 [components.llm.backend]
 @llm_backends = "spacy.REST.v1"
 api = "OpenAI"
 config = {"model": "gpt-3.5-turbo", "temperature": 0.3}
 ```
+
 In `0.4.x`:
+
 ```ini
 [components.llm.model]
 @llm_models = "spacy.GPT-3-5.v1"
 name = "gpt-3.5-turbo"
 config = {"temperature": 0.3}
 ```
-Note that the factory function (marked with `@`) refers to the name of the model. Variants of the same model can be 
+
+Note that the factory function (marked with `@`) refers to the name of the model. Variants of the same model can be
 specified with the `name` attribute - for `gpt-3.5` this could be `"gpt-3.5-turbo"` or `"gpt-3.5-turbo-16k"`.
 
 ### Models using HuggingFace
 
-On top of the changes described in the section above, HF models like `spacy.Dolly.v1` now accept `config_init` and 
+On top of the changes described in the section above, HF models like `spacy.Dolly.v1` now accept `config_init` and
 `config_run` to reflect that differerent arguments can be passed at init or run time.
 
 In `0.3.x`:
+
 ```ini
 [components.llm.backend]
 @llm_backends = "spacy.Dolly_HF.v1"
 model = "databricks/dolly-v2-3b"
 config = {}
 ```
+
 In `0.4.x`:
+
 ```ini
 [components.llm.model]
 @llm_models = "spacy.Dolly.v1"
 name = "dolly-v2-3b"  # or databricks/dolly-v2-3b - the prefix is optional
 config_init = {}  # Arguments passed to HF model at initialization time
-config_run = {}  # Arguments passed to HF model at inference time 
+config_run = {}  # Arguments passed to HF model at inference time
 ```
 
 ### Models using LangChain
@@ -67,6 +167,7 @@ LangChain models are now accessible via `langchain.[API].[version]`, e. g. `lang
 changes from 0.3.x to 0.4.x are identical with REST-based models.
 
 In `0.3.x`:
+
 ```ini
 [components.llm.backend]
 @llm_backends = "spacy.LangChain.v1"
@@ -75,6 +176,7 @@ config = {"temperature": 0.3}
 ```
 
 In `0.4.x`:
+
 ```ini
 [components.llm.model]
 @llm_models = "langchain.OpenAI.v1"
@@ -82,4 +184,4 @@ name = "gpt-3.5-turbo"
 config = {"temperature": 0.3}
 ```
 
-</details>
\ No newline at end of file
+</details>
diff --git a/setup.cfg b/setup.cfg
index 58e29676..a44361e5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-version = 0.4.3
+version = 0.5.0
 description = Integrating LLMs into structured NLP pipelines
 author = Explosion
 author_email = contact@explosion.ai
diff --git a/spacy_llm/pipeline/llm.py b/spacy_llm/pipeline/llm.py
index b48db6ae..1c3365d4 100644
--- a/spacy_llm/pipeline/llm.py
+++ b/spacy_llm/pipeline/llm.py
@@ -73,7 +73,7 @@ def make_llm(
     model (Callable[[Iterable[Any]], Iterable[Any]]]): Callable querying the specified LLM API.
     cache (Cache): Cache to use for caching prompts and responses per doc (batch).
     save_io (bool): Whether to save LLM I/O (prompts and responses) in the `Doc._.llm_io` custom extension.
-    validate_types (bool): Whether to check if signatures of configured backend and task are consistent.
+    validate_types (bool): Whether to check if signatures of configured model and task are consistent.
     """
     if task is None:
         raise ValueError(
diff --git a/spacy_llm/tasks/sentiment/parser.py b/spacy_llm/tasks/sentiment/parser.py
index 52082742..8365dab0 100644
--- a/spacy_llm/tasks/sentiment/parser.py
+++ b/spacy_llm/tasks/sentiment/parser.py
@@ -12,7 +12,6 @@ def parse_responses_v1(
     task (SentimentTask): Task instance.
     docs (Iterable[Doc]): Corresponding Doc instances.
     responses (Iterable[str]): LLM responses.
-    field (str): Field to store responses in.
     RETURNS (Iterable[Optional[float]]): Sentiment score per doc/response. None on parsing error.
     """
     for prompt_response in responses:
diff --git a/spacy_llm/ty.py b/spacy_llm/ty.py
index c984df9a..0673b1d0 100644
--- a/spacy_llm/ty.py
+++ b/spacy_llm/ty.py
@@ -243,7 +243,7 @@ def _extract_model_call_signature(model: PromptExecutorType) -> Dict[str, Any]:
 def validate_type_consistency(task: LLMTask, model: PromptExecutorType) -> None:
     """Check whether the types of the task and model signatures match.
     task (LLMTask): Specified task.
-    backend (PromptExecutor): Specified model.
+    model (PromptExecutor): Specified model.
     """
     # Raises an error or prints a warning if something looks wrong/odd.
     if not isinstance(task, LLMTask):
diff --git a/usage_examples/streamlit/streamlit_app.py b/usage_examples/streamlit/streamlit_app.py
index f08527be..b312deb9 100644
--- a/usage_examples/streamlit/streamlit_app.py
+++ b/usage_examples/streamlit/streamlit_app.py
@@ -19,10 +19,10 @@
 [components.llm]
 factory = "llm"
 
-[components.llm.backend]
-@llm_models = "spacy.REST.v1"
-api = "OpenAI"
-config = {"model": "gpt-3.5-turbo", "temperature": 0.0}
+[components.llm.model]
+@llm_models = "spacy.GPT-3-5.v2"
+name = "gpt-3.5-turbo"
+config = {"temperature": 0.0}
 
 [components.llm.task]
 @llm_tasks = "spacy.NER.v2"
@@ -44,10 +44,10 @@
 [components.llm]
 factory = "llm"
 
-[components.llm.backend]
-@llm_models = "spacy.REST.v1"
-api = "OpenAI"
-config = {"model": "gpt-3.5-turbo", "temperature": 0.0}
+[components.llm.model]
+@llm_models = "spacy.GPT-3-5.v2"
+name = "gpt-3.5-turbo"
+config = {"temperature": 0.0}
 
 [components.llm.task]
 @llm_tasks = "spacy.TextCat.v2"
diff --git a/usage_examples/tests/test_usage_examples.py b/usage_examples/tests/test_usage_examples.py
index 4185f4c2..b3305519 100644
--- a/usage_examples/tests/test_usage_examples.py
+++ b/usage_examples/tests/test_usage_examples.py
@@ -31,10 +31,10 @@ def test_ner_dolly(config_name: str):
 @pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
 @pytest.mark.parametrize("config_name", ("fewshot.cfg", "zeroshot.cfg"))
 def test_textcat_dolly(config_name: str):
-    """Test NER Dolly usage example.
+    """Test Textcat Dolly usage example.
     config_name (str): Name of config file to use.
     """
-    path = _USAGE_EXAMPLE_PATH / "textcat_openai"
+    path = _USAGE_EXAMPLE_PATH / "textcat_dolly"
     textcat_openai.run_pipeline(
         text="text",
         config_path=path / config_name,
diff --git a/usage_examples/textcat_dolly/fewshot.cfg b/usage_examples/textcat_dolly/fewshot.cfg
index 25dda2a1..2ece55d1 100644
--- a/usage_examples/textcat_dolly/fewshot.cfg
+++ b/usage_examples/textcat_dolly/fewshot.cfg
@@ -16,7 +16,7 @@ factory = "llm"
 name = "dolly-v2-3b"
 
 [components.llm.task]
-@llm_tasks = "spacy.TextCat.v2"
+@llm_tasks = "spacy.TextCat.v3"
 labels = COMPLIMENT,INSULT
 exclusive_classes = true
 
diff --git a/usage_examples/textcat_dolly/zeroshot.cfg b/usage_examples/textcat_dolly/zeroshot.cfg
index 76e87114..a31c5cd3 100644
--- a/usage_examples/textcat_dolly/zeroshot.cfg
+++ b/usage_examples/textcat_dolly/zeroshot.cfg
@@ -13,7 +13,7 @@ factory = "llm"
 name = "dolly-v2-3b"
 
 [components.llm.task]
-@llm_tasks = "spacy.TextCat.v2"
+@llm_tasks = "spacy.TextCat.v3"
 labels = COMPLIMENT,INSULT
 examples = null
 exclusive_classes = true

From b1bf960db7b8f6efd476ab1aff69fac8a24b8547 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Fri, 8 Sep 2023 11:01:14 +0200
Subject: [PATCH 2/4] Simplify usage example in readme (#288)

* Simplify usage example in readme

* Update README.md

Co-authored-by: Raphael Mitsch <r.mitsch@outlook.com>

---------

Co-authored-by: Raphael Mitsch <r.mitsch@outlook.com>
---
 README.md | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index d87e1978..fc646b62 100644
--- a/README.md
+++ b/README.md
@@ -57,11 +57,7 @@ python -m pip install spacy-llm
 
 ## 🐍 Quickstart
 
-The task and the model have to be supplied to the `llm` pipeline component using [spaCy's config
-system](https://spacy.io/api/data-formats#config).
-
-Let's run some text classification using a GPT-4 model from OpenAI. If you're using hosted APIs (as opposed to local
-models like Falcon, Dolly or LLaMA), ensure to that your API keys are set as environmental variables.
+Let's run some text classification using a GPT model from OpenAI. 
 
 Create a new API key from openai.com or fetch an existing one, and ensure the
 keys are set as environmental variables. For more background information, see
@@ -88,8 +84,9 @@ as well as the default GPT-3-5 model from OpenAI.
 
 ### Using a config file
 
-To get more control over the various parameters of the `llm` pipeline, 
-create a config file `config.cfg` containing at least the following (or see the
+To control the various parameters of the `llm` pipeline, we can use 
+[spaCy's config system](https://spacy.io/api/data-formats#config).
+To start, create a config file `config.cfg` containing at least the following (or see the
 full example
 [here](https://github.com/explosion/spacy-llm/tree/main/usage_examples/textcat_openai)):
 

From 2d3d77e00c5ae79a7a601b538c9c07b6e16a9622 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Mon, 11 Sep 2023 09:33:20 +0200
Subject: [PATCH 3/4] pin confection to at least 0.1.3 (#290)

---
 requirements.txt | 1 +
 setup.cfg        | 1 +
 2 files changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 99a880d2..64732698 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 spacy>=3.5.0,<4.0.0
+confection>=0.1.3,<1.0.0
 jinja2
 pytest>=5.2.0,!=7.1.0
 pytest-timeout>=1.3.0,<2.0.0
diff --git a/setup.cfg b/setup.cfg
index a44361e5..7363261a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,6 +34,7 @@ python_requires = >=3.6
 install_requires =
     spacy>=3.5,<4.0
     jinja2
+    confection>=0.1.3,<1.0.0
 
 [options.entry_points]
 spacy_factories =

From 8e9a11b914f248f9ba6400d46e396f78514b25b9 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Mon, 11 Sep 2023 10:42:56 +0200
Subject: [PATCH 4/4] Bump version. (#291)

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 7363261a..e76f0ed4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-version = 0.5.0
+version = 0.5.1
 description = Integrating LLMs into structured NLP pipelines
 author = Explosion
 author_email = contact@explosion.ai