stanfordnlp · CyrusNuevoDia · Nov 25, 2024 · Nov 25, 2024 · Nov 29, 2024 · Nov 29, 2024
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@
 /ScoNe/
 testing/outputs/
 testing/playbook.ipynb
+testing/outputs/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/dsp/utils/settings.py b/dsp/utils/settings.py
@@ -0,0 +1,142 @@
+import copy
+import threading
+from contextlib import contextmanager
+from dsp.utils.utils import dotdict
+
+DEFAULT_CONFIG = dotdict(
+    lm=None,
+    adapter=None,
+    rm=None,
+    branch_idx=0,
+    reranker=None,
+    compiled_lm=None,
+    force_reuse_cached_compilation=False,
+    compiling=False,
+    skip_logprobs=False,
+    trace=[],
+    release=0,
+    bypass_assert=False,
+    bypass_suggest=False,
+    assert_failures=0,
+    suggest_failures=0,
+    langchain_history=[],
+    experimental=False,
+    backoff_time=10,
+    callbacks=[],
+    async_max_workers=8,
+    request_cache=None,
+    send_stream=None,
+)
+
+# Global base configuration
+main_thread_config = copy.deepcopy(DEFAULT_CONFIG)
+
+
+class ThreadLocalOverrides(threading.local):
+    def __init__(self):
+        self.overrides = dotdict()  # Initialize thread-local overrides
+
+
+# Create the thread-local storage
+thread_local_overrides = ThreadLocalOverrides()
+
+
+class Settings:
+    """
+    A singleton class for DSPy configuration settings.
+
+    This is thread-safe. User threads are supported both through ParallelExecutor and native threading.
+        - If native threading is used, the thread inherits the initial config from the main thread.
+        - If ParallelExecutor is used, the thread inherits the initial config from its parent thread.
+    """
+
+    _instance = None
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance.lock = threading.Lock()  # maintained here for DSPy assertions.py
+        return cls._instance
+
+    def __getattr__(self, name):
+        overrides = getattr(thread_local_overrides, 'overrides', dotdict())
+        if name in overrides:
+            return overrides[name]
+        elif name in main_thread_config:
+            return main_thread_config[name]
+        else:
+            raise AttributeError(f"'Settings' object has no attribute '{name}'")
+
+    def __setattr__(self, name, value):
+        if name in ('_instance',):
+            super().__setattr__(name, value)
+        else:
+            self.configure(**{name: value})
+
+    # Dictionary-like access
+
+    def __getitem__(self, key):
+        return self.__getattr__(key)
+
+    def __setitem__(self, key, value):
+        self.__setattr__(key, value)
+
+    def __contains__(self, key):
+        overrides = getattr(thread_local_overrides, 'overrides', dotdict())
+        return key in overrides or key in main_thread_config
+
+    def get(self, key, default=None):
+        try:
+            return self[key]
+        except AttributeError:
+            return default
+
+    def copy(self):
+        overrides = getattr(thread_local_overrides, 'overrides', dotdict())
+        return dotdict({**main_thread_config, **overrides})
+
+    @property
+    def config(self):
+        config = self.copy()
+        if 'lock' in config:
+            del config['lock']
+        return config
+
+    # Configuration methods
+
+    def configure(self, **kwargs):
+        global main_thread_config
+
+        # Get or initialize thread-local overrides
+        overrides = getattr(thread_local_overrides, 'overrides', dotdict())
+        thread_local_overrides.overrides = dotdict(
+            {**copy.deepcopy(DEFAULT_CONFIG), **main_thread_config, **overrides, **kwargs}
+        )
+
+        # Update main_thread_config, in the main thread only
+        if threading.current_thread() is threading.main_thread():
+            main_thread_config = thread_local_overrides.overrides
+
+    @contextmanager
+    def context(self, **kwargs):
+        """Context manager for temporary configuration changes."""
+        global main_thread_config
+        original_overrides = getattr(thread_local_overrides, 'overrides', dotdict()).copy()
+        original_main_thread_config = main_thread_config.copy()
+
+        self.configure(**kwargs)
+        try:
+            yield
+        finally:
+            thread_local_overrides.overrides = original_overrides
+
+            if threading.current_thread() is threading.main_thread():
+                main_thread_config = original_main_thread_config
+
+    def __repr__(self):
+        overrides = getattr(thread_local_overrides, 'overrides', dotdict())
+        combined_config = {**main_thread_config, **overrides}
+        return repr(combined_config)
+
+
+settings = Settings()
diff --git a/dspy/__init__.py b/dspy/__init__.py
@@ -5,7 +5,6 @@
 from dspy.teleprompt import *
 
 import dspy.retrievers
-import dspy.teleprompt
 
 from dspy.evaluate import Evaluate  # isort: skip
 from dspy.clients import *  # isort: skip
@@ -25,6 +24,7 @@
 configure = settings.configure
 context = settings.context
 
+import dspy.teleprompt
 
 LabeledFewShot = dspy.teleprompt.LabeledFewShot
 BootstrapFewShot = dspy.teleprompt.BootstrapFewShot
@@ -36,4 +36,5 @@
 BetterTogether = dspy.teleprompt.BetterTogether
 COPRO = dspy.teleprompt.COPRO
 MIPROv2 = dspy.teleprompt.MIPROv2
+MIPROv2KNN = dspy.teleprompt.MIPROv2KNN
 Ensemble = dspy.teleprompt.Ensemble
diff --git a/dspy/clients/__init__.py b/dspy/clients/__init__.py
@@ -1,11 +1,13 @@
-from dspy.clients.lm import LM
-from dspy.clients.provider import Provider, TrainingJob
-from dspy.clients.base_lm import BaseLM, inspect_history
-from dspy.clients.embedding import Embedder
-import litellm
-import os
 from pathlib import Path
+import os
+
 from litellm.caching import Cache
+import litellm
+
+from dspy.clients.base_lm import BaseLM, inspect_history
+from dspy.clients.embedding import Embedder
+from dspy.clients.lm import LM
+from dspy.clients.provider import Provider, TrainingJob
 
 DISK_CACHE_DIR = os.environ.get("DSPY_CACHEDIR") or os.path.join(Path.home(), ".dspy_cache")
 DISK_CACHE_LIMIT = int(os.environ.get("DSPY_CACHE_LIMIT", 3e10))  # 30 GB default

diff --git a/dspy/clients/embedding.py b/dspy/clients/embedding.py
@@ -1,6 +1,9 @@
+from typing import Callable, List, Optional, Union
 import litellm
 import numpy as np
 
+from .lm import request_cache
+
 
 class Embedder:
     """DSPy embedding class.
@@ -56,13 +59,28 @@ def my_embedder(texts):
         ```
     """
 
-    def __init__(self, model, batch_size=200, caching=True, **kwargs):
+    def __init__(self, model: Union[str, Callable], batch_size=200, **kwargs):
+        if not isinstance(model, str) and not callable(model):
+            raise ValueError(f"`model` in `dspy.Embedder` must be a string or a callable, but got {type(model)}.")
+
         self.model = model
         self.batch_size = batch_size
-        self.caching = caching
         self.default_kwargs = kwargs
 
-    def __call__(self, inputs, batch_size=None, caching=None, **kwargs):
+    def _embed(self, inputs: List[str], cache: bool, **kwargs):
+        if callable(self.model):
+            return self.model(inputs, **kwargs)
+
+        response = litellm_embedding({"model": self.model, "input": inputs, **kwargs}).data
+        return [data["embedding"] for data in response]
+
+    def __call__(
+        self,
+        inputs: Union[str, List[str]],
+        batch_size: Optional[int] = None,
+        cache: Optional[bool] = None,
+        **kwargs,
+    ) -> np.ndarray:
         """Compute embeddings for the given inputs.
 
         Args:
@@ -76,46 +94,29 @@ def __call__(self, inputs, batch_size=None, caching=None, **kwargs):
             If the input is a list of strings, returns a 2D numpy array of embeddings, one embedding per row.
         """
 
-        if isinstance(inputs, str):
-            is_single_input = True
+        multi_input = isinstance(inputs, list)
+        if not multi_input:
             inputs = [inputs]
-        else:
-            is_single_input = False
 
         assert all(isinstance(inp, str) for inp in inputs), "All inputs must be strings."
 
-        if batch_size is None:
-            batch_size = self.batch_size
-        if caching is None:
-            caching = self.caching
-
-        merged_kwargs = self.default_kwargs.copy()
-        merged_kwargs.update(kwargs)
-
-        embeddings_list = []
-
-        def chunk(inputs_list, size):
-            for i in range(0, len(inputs_list), size):
-                yield inputs_list[i : i + size]
-
-        for batch_inputs in chunk(inputs, batch_size):
-            if isinstance(self.model, str):
-                embedding_response = litellm.embedding(
-                    model=self.model, input=batch_inputs, caching=caching, **merged_kwargs
-                )
-                batch_embeddings = [data["embedding"] for data in embedding_response.data]
-            elif callable(self.model):
-                batch_embeddings = self.model(batch_inputs, **merged_kwargs)
-            else:
-                raise ValueError(
-                    f"`model` in `dspy.Embedder` must be a string or a callable, but got {type(self.model)}."
-                )
-
-            embeddings_list.extend(batch_embeddings)
-
-        embeddings = np.array(embeddings_list, dtype=np.float32)
-
-        if is_single_input:
-            return embeddings[0]
-        else:
-            return embeddings
+        batch_size = batch_size or self.batch_size
+        kwargs = {**self.default_kwargs, **kwargs}
+
+        embeddings = flatten([self._embed(c, cache, **kwargs) for c in chunk(inputs, batch_size)])
+        embeddings = embeddings if multi_input else embeddings[0]
+        return np.array(embeddings, dtype=np.float32)
+
+
+def chunk(inputs_list, size):
+    for i in range(0, len(inputs_list), size):
+        yield inputs_list[i : i + size]
+
+
+def flatten(list_of_lists):
+    return [item for sublist in list_of_lists for item in sublist]
+
+
+@request_cache(maxsize=None)
+def litellm_embedding(request):
+    return litellm.embedding(**request, cache={"no-cache": False, "no-store": False})
diff --git a/dspy/evaluate/auto_evaluation.py b/dspy/evaluate/auto_evaluation.py
@@ -45,13 +45,15 @@ def __init__(self, threshold=0.66, decompositional=False):
             self.module = dspy.ChainOfThought(SemanticRecallPrecision)
 
     def forward(self, example, pred, trace=None):
-        scores = self.module(question=example.question, ground_truth=example.response, system_response=pred.response)
+        ground_truth = example.response if hasattr(example, "response") else getattr(example, "answer", None)
+        system_response = pred.response if hasattr(pred, "response") else getattr(pred, "answer", None)
+
+        scores = self.module(question=example.question, ground_truth=ground_truth, system_response=system_response)
         score = f1_score(scores.precision, scores.recall)
 
         return score if trace is None else score >= self.threshold
 
 
-
 ###########
 
 
@@ -70,7 +72,6 @@ class AnswerCompleteness(dspy.Signature):
     completeness: float = dspy.OutputField(desc="fraction (out of 1.0) of ground truth covered by the system response")
 
 
-
 class AnswerGroundedness(dspy.Signature):
     """
     Estimate the groundedness of a system's responses, against real retrieved documents written by people.
@@ -81,9 +82,13 @@ class AnswerGroundedness(dspy.Signature):
     question: str = dspy.InputField()
     retrieved_context: str = dspy.InputField()
     system_response: str = dspy.InputField()
-    system_response_claims: str = dspy.OutputField(desc="enumeration of non-trivial or check-worthy claims in the system response")
+    system_response_claims: str = dspy.OutputField(
+        desc="enumeration of non-trivial or check-worthy claims in the system response"
+    )
     discussion: str = dspy.OutputField(desc="discussion of how supported the claims are by the retrieved context")
-    groundedness: float = dspy.OutputField(desc="fraction (out of 1.0) of system response supported by the retrieved context")
+    groundedness: float = dspy.OutputField(
+        desc="fraction (out of 1.0) of system response supported by the retrieved context"
+    )
 
 
 class CompleteAndGrounded(dspy.Module):
@@ -93,8 +98,12 @@ def __init__(self, threshold=0.66):
         self.groundedness_module = dspy.ChainOfThought(AnswerGroundedness)
 
     def forward(self, example, pred, trace=None):
-        completeness = self.completeness_module(question=example.question, ground_truth=example.response, system_response=pred.response)
-        groundedness = self.groundedness_module(question=example.question, retrieved_context=pred.context, system_response=pred.response)
+        completeness = self.completeness_module(
+            question=example.question, ground_truth=example.response, system_response=pred.response
+        )
+        groundedness = self.groundedness_module(
+            question=example.question, retrieved_context=pred.context, system_response=pred.response
+        )
         score = f1_score(groundedness.groundedness, completeness.completeness)
 
         return score if trace is None else score >= self.threshold
diff --git a/dspy/evaluate/evaluate.py b/dspy/evaluate/evaluate.py
@@ -38,9 +38,6 @@ def HTML(x: str) -> str:
 logger = logging.getLogger(__name__)
 
 
-logger = logging.getLogger(__name__)
-
-
 class Evaluate:
     def __init__(
         self,