Added Amazon Bedrock Titan Example

explosion · Oct 24, 2023 · a133124 · a133124
1 parent eb6ba41
commit a133124
Show file tree

Hide file tree

Showing 5 changed files with 176 additions and 0 deletions.
diff --git a/usage_examples/ner_v3_titan/README.md b/usage_examples/ner_v3_titan/README.md
@@ -0,0 +1,76 @@
+# Using Titan Express Model from Amazon Bedrock for Named Entity Recognition (NER)
+
+
+This example shows how you can use a model from OpenAI for Named Entity Recognition (NER).
+The NER prompt is based on the [PromptNER](https://arxiv.org/abs/2305.15444) paper and
+utilizes Chain-of-Thought reasoning to extract named entities.
+
+First, create a new credentials from AWS Console
+Record the secret key and make sure this is available as an environmental
+variable:
+
+```sh
+export AWS_ACCESS_KEY_ID=""
+export AWS_SECRET_ACCESS_KEY=""
+export AWS_SESSION_TOKEN=""
+```
+
+Then, you can run the pipeline on a sample text via:
+
+
+```sh
+python run_pipeline.py [TEXT] [PATH TO CONFIG] [PATH TO FILE WITH EXAMPLES]
+```
+
+For example:
+
+```sh
+python run_pipeline.py \
+    ""Sriracha sauce goes really well with hoisin stir fry, but you should add it after you use the wok." \
+    ./fewshot.cfg
+    ./examples.json
+```
+
+This example assings labels for DISH, INGREDIENT, and EQUIPMENT.
+
+You can change around the labels and examples for your use case.
+You can find the few-shot examples in the
+`examples.json` file. Feel free to change and update it to your liking.
+We also support other file formats, including `yml` and `jsonl` for these examples.
+
+
+### Negative examples
+
+While not required, The Chain-of-Thought reasoning for the `spacy.NER.v3` task
+works best in our experience when both positive and negative examples are provided.
+
+This prompts the Language model with concrete examples of what **is not** an entity
+for your use case.
+
+Here's an example that helps define the INGREDIENT label for the LLM.
+
+```json
+[
+    {
+        "text": "You can't get a great chocolate flavor with carob.",
+        "spans": [
+            {
+                "text": "chocolate",
+                "is_entity": false,
+                "label": "==NONE==",
+                "reason": "is a flavor in this context, not an ingredient"
+            },
+            {
+                "text": "carob",
+                "is_entity": true,
+                "label": "INGREDIENT",
+                "reason": "is an ingredient to add chocolate flavor"
+            }
+        ]
+    }
+    ...
+]
+```
+
+In this example, "chocolate" is not an ingredient even though it could be in other contexts.
+We explain that via the "reason" property of this example.
diff --git a/usage_examples/ner_v3_titan/__init__.py b/usage_examples/ner_v3_titan/__init__.py
@@ -0,0 +1,3 @@
+from .run_pipeline import run_pipeline
+
+__all__ = ["run_pipeline"]
diff --git a/usage_examples/ner_v3_titan/examples.json b/usage_examples/ner_v3_titan/examples.json
@@ -0,0 +1,36 @@
+[
+  {
+    "text": "You can't get a great chocolate flavor with carob.",
+    "spans": [
+      {
+        "text": "chocolate",
+        "is_entity": false,
+        "label": "==NONE==",
+        "reason": "is a flavor in this context, not an ingredient"
+      },
+      {
+        "text": "carob",
+        "is_entity": true,
+        "label": "INGREDIENT",
+        "reason": "is an ingredient to add chocolate flavor"
+      }
+    ]
+  },
+  {
+    "text": "You can probably sand-blast it if it's an anodized aluminum pan",
+    "spans": [
+      {
+        "text": "sand-blast",
+        "is_entity": false,
+        "label": "==NONE==",
+        "reason": "is a cleaning technique, not some kind of equipment"
+      },
+      {
+        "text": "anodized aluminum pan",
+        "is_entity": true,
+        "label": "EQUIPMENT",
+        "reason": "is a piece of cooking equipment, anodized is included since it describes the type of pan"
+      }
+    ]
+  }
+]
diff --git a/usage_examples/ner_v3_titan/fewshot.cfg b/usage_examples/ner_v3_titan/fewshot.cfg
@@ -0,0 +1,32 @@
+[paths]
+examples = null
+
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.task]
+@llm_tasks = "spacy.NER.v3"
+labels = ["DISH", "INGREDIENT", "EQUIPMENT"]
+description = Entities are the names food dishes,
+    ingredients, and any kind of cooking equipment.
+    Adjectives, verbs, adverbs are not entities.
+    Pronouns are not entities.
+
+[components.llm.task.label_definitions]
+DISH = "Known food dishes, e.g. Lobster Ravioli, garlic bread"
+INGREDIENT = "Individual parts of a food dish, including herbs and spices."
+EQUIPMENT = "Any kind of cooking equipment. e.g. oven, cooking pot, grill"
+
+[components.llm.task.examples]
+@misc = "spacy.FewShotReader.v1"
+path = "${paths.examples}"
+
+[components.llm.model]
+@llm_models = "spacy.Bedrock.Titan.Express.v1"
+region = us-east-1
diff --git a/usage_examples/ner_v3_titan/run_pipeline.py b/usage_examples/ner_v3_titan/run_pipeline.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+
+import typer
+from wasabi import msg
+
+from spacy_llm.util import assemble
+
+Arg = typer.Argument
+Opt = typer.Option
+
+
+def run_pipeline(
+    # fmt: off
+    text: str = Arg("", help="Text to perform Named Entity Recognition on."),
+    config_path: Path = Arg(..., help="Path to the configuration file to use."),
+    examples_path: Path = Arg(..., help="Path to the examples file to use."),
+    verbose: bool = Opt(False, "--verbose", "-v", help="Show extra information."),
+    # fmt: on
+):
+    msg.text(f"Loading config from {config_path}", show=verbose)
+    nlp = assemble(config_path, overrides={"paths.examples": str(examples_path)})
+    doc = nlp(text)
+
+    msg.text(f"Text: {doc.text}")
+    msg.text(f"Entities: {[(ent.text, ent.label_) for ent in doc.ents]}")
+
+
+if __name__ == "__main__":
+    typer.run(run_pipeline)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .run_pipeline import run_pipeline

		__all__ = ["run_pipeline"]