Skip to content

Commit

Permalink
Merge branch 'main' into snoopy
Browse files Browse the repository at this point in the history
  • Loading branch information
CyrusNuevoDia committed Nov 25, 2024
2 parents c9d268b + 1b10e23 commit 0c1f989
Show file tree
Hide file tree
Showing 24 changed files with 1,040 additions and 791 deletions.
2 changes: 1 addition & 1 deletion docs/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ BootstrapFS on MATH with a tiny LM like Llama-3.2 with Ollama (maybe with a big

## 3) **DSPy's Ecosystem** advances open-source AI research.

Compared to working on or with monolithic LMs, DSPy's modular paradigm aims to enable a large community to improve the compositional architectures, inference-time strategies, and optimizers for LM programs in an open, distributed way. It gives you more control, helps you iterate much faster, and allows your programs to get better over time by applying the latest optimizers or modules.
Compared to monolithic LMs, DSPy's modular paradigm enables a large community to improve the compositional architectures, inference-time strategies, and optimizers for LM programs in an open, distributed way. This gives DSPy users more control, helps them iterate much faster, and allows their programs to get better over time by applying the latest optimizers or modules.

The DSPy research effort started at Stanford NLP in Feb 2022, building on what we learned from developing early [compound LM systems](https://bair.berkeley.edu/blog/2024/02/18/compound-ai-systems/) like [ColBERT-QA](https://arxiv.org/abs/2007.00814), [Baleen](https://arxiv.org/abs/2101.00436), and [Hindsight](https://arxiv.org/abs/2110.07752). The first version was released as [DSP](https://arxiv.org/abs/2212.14024) in Dec 2022 and evolved by Oct 2023 into [DSPy](https://arxiv.org/abs/2310.03714). Thanks to [250 contributors](https://github.com/stanfordnlp/dspy/graphs/contributors), DSPy has introduced tens of thousands of people to building and optimizing modular LM programs.

Expand Down
1 change: 0 additions & 1 deletion docs/docs/learn/programming/signatures.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ The 21-year-old Lee made seven appearances and scored one goal for West Ham last

Many DSPy modules (except `dspy.Predict`) return auxiliary information by expanding your signature under the hood.

For example, `dspy.ChainOfThought` also adds a `reasoning` field that includes the LM's reasoning before it generates the output `summary`.
For example, `dspy.ChainOfThought` also adds a `reasoning` field that includes the LM's reasoning before it generates the output `summary`.

```python
Expand Down
323 changes: 40 additions & 283 deletions docs/docs/tutorials/entity_extraction/index.ipynb

Large diffs are not rendered by default.

483 changes: 278 additions & 205 deletions docs/docs/tutorials/rag/index.ipynb

Large diffs are not rendered by default.

133 changes: 77 additions & 56 deletions dsp/utils/settings.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import copy
import threading
from contextlib import contextmanager
from copy import deepcopy

from contextlib import contextmanager
from contextvars import ContextVar
from dsp.utils.utils import dotdict

DEFAULT_CONFIG = dotdict(
Expand All @@ -27,85 +28,105 @@
async_max_workers=8,
)

# Global base configuration
main_thread_config = copy.deepcopy(DEFAULT_CONFIG)

# Initialize the context variable with an empty dict as default
dspy_ctx_overrides = ContextVar('dspy_ctx_overrides', default=dotdict())


class Settings:
"""DSP configuration settings."""
"""
A singleton class for DSPy configuration settings.
This is thread-safe. User threads are supported both through ParallelExecutor and native threading.
- If native threading is used, the thread inherits the initial config from the main thread.
- If ParallelExecutor is used, the thread inherits the initial config from its parent thread.
"""

_instance = None

def __new__(cls):
"""
Singleton Pattern. See https://python-patterns.guide/gang-of-four/singleton/
"""

if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance.lock = threading.Lock()
cls._instance.main_tid = threading.get_ident()
cls._instance.main_stack = []
cls._instance.stack_by_thread = {}
cls._instance.stack_by_thread[threading.get_ident()] = cls._instance.main_stack
cls._instance.lock = threading.Lock() # maintained here for DSPy assertions.py
return cls._instance

# TODO: remove first-class support for re-ranker and potentially combine with RM to form a pipeline of sorts
# eg: RetrieveThenRerankPipeline(RetrievalModel, Reranker)
# downstream operations like dsp.retrieve would use configs from the defined pipeline.
def __getattr__(self, name):
overrides = dspy_ctx_overrides.get()
if name in overrides:
return overrides[name]
elif name in main_thread_config:
return main_thread_config[name]
else:
raise AttributeError(f"'Settings' object has no attribute '{name}'")

# make a deepcopy of the default config to avoid modifying the default config
cls._instance.__append(deepcopy(DEFAULT_CONFIG))
def __setattr__(self, name, value):
if name in ('_instance',):
super().__setattr__(name, value)
else:
self.configure(**{name: value})

return cls._instance
# Dictionary-like access

@property
def config(self):
thread_id = threading.get_ident()
if thread_id not in self.stack_by_thread:
self.stack_by_thread[thread_id] = [self.main_stack[-1].copy()]
return self.stack_by_thread[thread_id][-1]
def __getitem__(self, key):
return self.__getattr__(key)

def __getattr__(self, name):
if hasattr(self.config, name):
return getattr(self.config, name)
def __setitem__(self, key, value):
self.__setattr__(key, value)

if name in self.config:
return self.config[name]
def __contains__(self, key):
overrides = dspy_ctx_overrides.get()
return key in overrides or key in main_thread_config

def get(self, key, default=None):
try:
return self[key]
except AttributeError:
return default

super().__getattr__(name)
def copy(self):
overrides = dspy_ctx_overrides.get()
return dotdict({**main_thread_config, **overrides})

def __append(self, config):
thread_id = threading.get_ident()
if thread_id not in self.stack_by_thread:
self.stack_by_thread[thread_id] = [self.main_stack[-1].copy()]
self.stack_by_thread[thread_id].append(config)
@property
def config(self):
config = self.copy()
del config['lock']
return config

def __pop(self):
thread_id = threading.get_ident()
if thread_id in self.stack_by_thread:
self.stack_by_thread[thread_id].pop()
# Configuration methods

def configure(self, inherit_config: bool = True, **kwargs):
"""Set configuration settings.
def configure(self, return_token=False, **kwargs):
global main_thread_config
overrides = dspy_ctx_overrides.get()
new_overrides = dotdict({**copy.deepcopy(DEFAULT_CONFIG), **main_thread_config, **overrides, **kwargs})
token = dspy_ctx_overrides.set(new_overrides)

Args:
inherit_config (bool, optional): Set configurations for the given, and use existing configurations for the rest. Defaults to True.
"""
if inherit_config:
config = {**self.config, **kwargs}
else:
config = {**kwargs}
# Update main_thread_config, in the main thread only
if threading.current_thread() is threading.main_thread():
main_thread_config = new_overrides

self.__append(config)
if return_token:
return token

@contextmanager
def context(self, inherit_config=True, **kwargs):
self.configure(inherit_config=inherit_config, **kwargs)

def context(self, **kwargs):
"""Context manager for temporary configuration changes."""
token = self.configure(return_token=True, **kwargs)
try:
yield
finally:
self.__pop()
dspy_ctx_overrides.reset(token)

if threading.current_thread() is threading.main_thread():
global main_thread_config
main_thread_config = dotdict({**copy.deepcopy(DEFAULT_CONFIG), **dspy_ctx_overrides.get()})

def __repr__(self) -> str:
return repr(self.config)
def __repr__(self):
overrides = dspy_ctx_overrides.get()
combined_config = {**main_thread_config, **overrides}
return repr(combined_config)


settings = Settings()
settings = Settings()
2 changes: 2 additions & 0 deletions dspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from .retrieve import *
from .signatures import *

import dspy.retrievers

# Functional must be imported after primitives, predict and signatures
from .functional import * # isort: skip
from dspy.evaluate import Evaluate # isort: skip
Expand Down
Loading

0 comments on commit 0c1f989

Please sign in to comment.