This is page 3 of 17. Use http://codebase.md/stanfordnlp/dspy?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── .internal_dspyai │ │ ├── internals │ │ │ ├── build-and-release.md │ │ │ └── release-checklist.md │ │ └── pyproject.toml │ ├── .tmp │ │ └── .generated-actions │ │ └── run-pypi-publish-in-docker-container │ │ └── action.yml │ ├── ISSUE_TEMPLATE │ │ ├── bug_report.yml │ │ └── feature_request.yml │ ├── PULL_REQUEST_TEMPLATE │ │ └── pull_request_template.md │ ├── workflow_scripts │ │ └── install_testpypi_pkg.sh │ └── workflows │ ├── build_and_release.yml │ ├── build_utils │ │ └── test_version.py │ ├── docs-push.yml │ ├── precommits_check.yml │ └── run_tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── docs │ ├── .gitignore │ ├── docs │ │ ├── api │ │ │ ├── adapters │ │ │ │ ├── Adapter.md │ │ │ │ ├── ChatAdapter.md │ │ │ │ ├── JSONAdapter.md │ │ │ │ └── TwoStepAdapter.md │ │ │ ├── evaluation │ │ │ │ ├── answer_exact_match.md │ │ │ │ ├── answer_passage_match.md │ │ │ │ ├── CompleteAndGrounded.md │ │ │ │ ├── Evaluate.md │ │ │ │ ├── EvaluationResult.md │ │ │ │ └── SemanticF1.md │ │ │ ├── experimental │ │ │ │ ├── Citations.md │ │ │ │ └── Document.md │ │ │ ├── index.md │ │ │ ├── models │ │ │ │ ├── Embedder.md │ │ │ │ └── LM.md │ │ │ ├── modules │ │ │ │ ├── BestOfN.md │ │ │ │ ├── ChainOfThought.md │ │ │ │ ├── CodeAct.md │ │ │ │ ├── Module.md │ │ │ │ ├── MultiChainComparison.md │ │ │ │ ├── Parallel.md │ │ │ │ ├── Predict.md │ │ │ │ ├── ProgramOfThought.md │ │ │ │ ├── ReAct.md │ │ │ │ └── Refine.md │ │ │ ├── optimizers │ │ │ │ ├── BetterTogether.md │ │ │ │ ├── BootstrapFewShot.md │ │ │ │ ├── BootstrapFewShotWithRandomSearch.md │ │ │ │ ├── BootstrapFinetune.md │ │ │ │ ├── BootstrapRS.md │ │ │ │ ├── COPRO.md │ │ │ │ ├── Ensemble.md │ │ │ │ ├── GEPA │ │ │ │ │ ├── GEPA_Advanced.md │ │ │ │ │ └── overview.md │ │ │ │ ├── InferRules.md │ │ │ │ ├── KNN.md │ │ │ │ ├── KNNFewShot.md │ │ │ │ ├── LabeledFewShot.md │ │ │ │ ├── MIPROv2.md │ │ │ │ └── SIMBA.md │ │ │ ├── primitives │ │ │ │ ├── Audio.md │ │ │ │ ├── Code.md │ │ │ │ ├── Example.md │ │ │ │ ├── History.md │ │ │ │ ├── Image.md │ │ │ │ ├── Prediction.md │ │ │ │ ├── Tool.md │ │ │ │ └── ToolCalls.md │ │ │ ├── signatures │ │ │ │ ├── InputField.md │ │ │ │ ├── OutputField.md │ │ │ │ └── Signature.md │ │ │ ├── tools │ │ │ │ ├── ColBERTv2.md │ │ │ │ ├── Embeddings.md │ │ │ │ └── PythonInterpreter.md │ │ │ └── utils │ │ │ ├── asyncify.md │ │ │ ├── configure_cache.md │ │ │ ├── disable_litellm_logging.md │ │ │ ├── disable_logging.md │ │ │ ├── enable_litellm_logging.md │ │ │ ├── enable_logging.md │ │ │ ├── inspect_history.md │ │ │ ├── load.md │ │ │ ├── StatusMessage.md │ │ │ ├── StatusMessageProvider.md │ │ │ ├── streamify.md │ │ │ └── StreamListener.md │ │ ├── cheatsheet.md │ │ ├── community │ │ │ ├── community-resources.md │ │ │ ├── how-to-contribute.md │ │ │ └── use-cases.md │ │ ├── deep-dive │ │ │ └── data-handling │ │ │ ├── built-in-datasets.md │ │ │ ├── examples.md │ │ │ ├── img │ │ │ │ └── data-loading.png │ │ │ └── loading-custom-data.md │ │ ├── faqs.md │ │ ├── index.md │ │ ├── js │ │ │ └── runllm-widget.js │ │ ├── learn │ │ │ ├── evaluation │ │ │ │ ├── data.md │ │ │ │ ├── metrics.md │ │ │ │ └── overview.md │ │ │ ├── figures │ │ │ │ ├── native_tool_call.png │ │ │ │ └── teleprompter-classes.png │ │ │ ├── index.md │ │ │ ├── optimization │ │ │ │ ├── optimizers.md │ │ │ │ └── overview.md │ │ │ └── programming │ │ │ ├── 7-assertions.md │ │ │ ├── adapters.md │ │ │ ├── language_models.md │ │ │ ├── mcp.md │ │ │ ├── modules.md │ │ │ ├── overview.md │ │ │ ├── signatures.md │ │ │ └── tools.md │ │ ├── production │ │ │ └── index.md │ │ ├── roadmap.md │ │ ├── static │ │ │ ├── .nojekyll │ │ │ └── img │ │ │ ├── dspy_logo.png │ │ │ ├── logo.png │ │ │ ├── mlflow-tracing-rag.png │ │ │ ├── modular.png │ │ │ ├── optimize.png │ │ │ ├── undraw_docusaurus_mountain.svg │ │ │ ├── undraw_docusaurus_react.svg │ │ │ ├── undraw_docusaurus_tree.svg │ │ │ └── universal_compatibility.png │ │ ├── stylesheets │ │ │ └── extra.css │ │ └── tutorials │ │ ├── agents │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-agent.png │ │ ├── ai_text_game │ │ │ └── index.md │ │ ├── async │ │ │ └── index.md │ │ ├── audio │ │ │ └── index.ipynb │ │ ├── build_ai_program │ │ │ └── index.md │ │ ├── cache │ │ │ └── index.md │ │ ├── classification │ │ │ └── index.md │ │ ├── classification_finetuning │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-classification.png │ │ ├── conversation_history │ │ │ └── index.md │ │ ├── core_development │ │ │ └── index.md │ │ ├── custom_module │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-custom-module.png │ │ ├── customer_service_agent │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-customer-service-agent.png │ │ ├── deployment │ │ │ ├── dspy_mlflow_ui.png │ │ │ └── index.md │ │ ├── email_extraction │ │ │ ├── index.md │ │ │ └── mlflow-tracing-email-extraction.png │ │ ├── entity_extraction │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-entity-extraction.png │ │ ├── games │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-agent.png │ │ ├── gepa_ai_program │ │ │ └── index.md │ │ ├── gepa_aime │ │ │ ├── index.ipynb │ │ │ ├── mlflow-tracing-gepa-aime.png │ │ │ └── mlflow-tracking-gepa-aime-optimization.png │ │ ├── gepa_facilitysupportanalyzer │ │ │ ├── index.ipynb │ │ │ ├── mlflow-tracing-gepa-support.png │ │ │ └── mlflow-tracking-gepa-support-optimization.png │ │ ├── gepa_papillon │ │ │ ├── index.ipynb │ │ │ ├── mlflow-tracing-gepa-papilon.png │ │ │ └── mlflow-tracking-gepa-papilon-optimization.png │ │ ├── image_generation_prompting │ │ │ └── index.ipynb │ │ ├── index.md │ │ ├── llms_txt_generation │ │ │ └── index.md │ │ ├── math │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-math.png │ │ ├── mcp │ │ │ └── index.md │ │ ├── mem0_react_agent │ │ │ └── index.md │ │ ├── multihop_search │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-multi-hop.png │ │ ├── observability │ │ │ ├── index.md │ │ │ ├── mlflow_trace_ui_navigation.gif │ │ │ ├── mlflow_trace_ui.png │ │ │ └── mlflow_trace_view.png │ │ ├── optimize_ai_program │ │ │ └── index.md │ │ ├── optimizer_tracking │ │ │ ├── child_run.png │ │ │ ├── experiment.png │ │ │ ├── index.md │ │ │ └── parent_run.png │ │ ├── output_refinement │ │ │ └── best-of-n-and-refine.md │ │ ├── papillon │ │ │ └── index.md │ │ ├── program_of_thought │ │ │ └── index.ipynb │ │ ├── rag │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-rag.png │ │ ├── real_world_examples │ │ │ └── index.md │ │ ├── rl_ai_program │ │ │ └── index.md │ │ ├── rl_multihop │ │ │ └── index.ipynb │ │ ├── rl_papillon │ │ │ └── index.ipynb │ │ ├── sample_code_generation │ │ │ └── index.md │ │ ├── saving │ │ │ └── index.md │ │ ├── streaming │ │ │ └── index.md │ │ ├── tool_use │ │ │ └── index.ipynb │ │ └── yahoo_finance_react │ │ └── index.md │ ├── mkdocs.yml │ ├── overrides │ │ ├── home.html │ │ ├── main.html │ │ └── partials │ │ └── tabs.html │ ├── Pipfile │ ├── Pipfile.lock │ ├── README.md │ ├── requirements.txt │ ├── scripts │ │ ├── generate_api_docs.py │ │ └── generate_api_summary.py │ └── vercel.json ├── dspy │ ├── __init__.py │ ├── __metadata__.py │ ├── adapters │ │ ├── __init__.py │ │ ├── baml_adapter.py │ │ ├── base.py │ │ ├── chat_adapter.py │ │ ├── json_adapter.py │ │ ├── two_step_adapter.py │ │ ├── types │ │ │ ├── __init__.py │ │ │ ├── audio.py │ │ │ ├── base_type.py │ │ │ ├── citation.py │ │ │ ├── code.py │ │ │ ├── document.py │ │ │ ├── history.py │ │ │ ├── image.py │ │ │ └── tool.py │ │ ├── utils.py │ │ └── xml_adapter.py │ ├── clients │ │ ├── __init__.py │ │ ├── base_lm.py │ │ ├── cache.py │ │ ├── databricks.py │ │ ├── embedding.py │ │ ├── lm_local_arbor.py │ │ ├── lm_local.py │ │ ├── lm.py │ │ ├── openai.py │ │ ├── provider.py │ │ └── utils_finetune.py │ ├── datasets │ │ ├── __init__.py │ │ ├── alfworld │ │ │ ├── __init__.py │ │ │ ├── alfworld.py │ │ │ └── base_config.yml │ │ ├── colors.py │ │ ├── dataloader.py │ │ ├── dataset.py │ │ ├── gsm8k.py │ │ ├── hotpotqa.py │ │ └── math.py │ ├── dsp │ │ ├── __init__.py │ │ ├── colbertv2.py │ │ └── utils │ │ ├── __init__.py │ │ ├── dpr.py │ │ ├── settings.py │ │ └── utils.py │ ├── evaluate │ │ ├── __init__.py │ │ ├── auto_evaluation.py │ │ ├── evaluate.py │ │ └── metrics.py │ ├── experimental │ │ └── __init__.py │ ├── predict │ │ ├── __init__.py │ │ ├── aggregation.py │ │ ├── avatar │ │ │ ├── __init__.py │ │ │ ├── avatar.py │ │ │ ├── models.py │ │ │ └── signatures.py │ │ ├── best_of_n.py │ │ ├── chain_of_thought.py │ │ ├── code_act.py │ │ ├── knn.py │ │ ├── multi_chain_comparison.py │ │ ├── parallel.py │ │ ├── parameter.py │ │ ├── predict.py │ │ ├── program_of_thought.py │ │ ├── react.py │ │ ├── refine.py │ │ └── retry.py │ ├── primitives │ │ ├── __init__.py │ │ ├── base_module.py │ │ ├── example.py │ │ ├── module.py │ │ ├── prediction.py │ │ ├── python_interpreter.py │ │ └── runner.js │ ├── propose │ │ ├── __init__.py │ │ ├── dataset_summary_generator.py │ │ ├── grounded_proposer.py │ │ ├── propose_base.py │ │ └── utils.py │ ├── retrievers │ │ ├── __init__.py │ │ ├── databricks_rm.py │ │ ├── embeddings.py │ │ ├── retrieve.py │ │ └── weaviate_rm.py │ ├── signatures │ │ ├── __init__.py │ │ ├── field.py │ │ ├── signature.py │ │ └── utils.py │ ├── streaming │ │ ├── __init__.py │ │ ├── messages.py │ │ ├── streamify.py │ │ └── streaming_listener.py │ ├── teleprompt │ │ ├── __init__.py │ │ ├── avatar_optimizer.py │ │ ├── bettertogether.py │ │ ├── bootstrap_finetune.py │ │ ├── bootstrap_trace.py │ │ ├── bootstrap.py │ │ ├── copro_optimizer.py │ │ ├── ensemble.py │ │ ├── gepa │ │ │ ├── __init__.py │ │ │ ├── gepa_utils.py │ │ │ ├── gepa.py │ │ │ └── instruction_proposal.py │ │ ├── grpo.py │ │ ├── infer_rules.py │ │ ├── knn_fewshot.py │ │ ├── mipro_optimizer_v2.py │ │ ├── random_search.py │ │ ├── signature_opt.py │ │ ├── simba_utils.py │ │ ├── simba.py │ │ ├── teleprompt_optuna.py │ │ ├── teleprompt.py │ │ ├── utils.py │ │ └── vanilla.py │ └── utils │ ├── __init__.py │ ├── annotation.py │ ├── asyncify.py │ ├── caching.py │ ├── callback.py │ ├── dummies.py │ ├── exceptions.py │ ├── hasher.py │ ├── inspect_history.py │ ├── langchain_tool.py │ ├── logging_utils.py │ ├── mcp.py │ ├── parallelizer.py │ ├── saving.py │ ├── syncify.py │ ├── unbatchify.py │ └── usage_tracker.py ├── LICENSE ├── pyproject.toml ├── README.md ├── tests │ ├── __init__.py │ ├── adapters │ │ ├── test_adapter_utils.py │ │ ├── test_baml_adapter.py │ │ ├── test_base_type.py │ │ ├── test_chat_adapter.py │ │ ├── test_citation.py │ │ ├── test_code.py │ │ ├── test_document.py │ │ ├── test_json_adapter.py │ │ ├── test_tool.py │ │ ├── test_two_step_adapter.py │ │ └── test_xml_adapter.py │ ├── callback │ │ └── test_callback.py │ ├── clients │ │ ├── test_cache.py │ │ ├── test_databricks.py │ │ ├── test_embedding.py │ │ ├── test_inspect_global_history.py │ │ └── test_lm.py │ ├── conftest.py │ ├── datasets │ │ └── test_dataset.py │ ├── docs │ │ └── test_mkdocs_links.py │ ├── evaluate │ │ ├── test_evaluate.py │ │ └── test_metrics.py │ ├── examples │ │ └── test_baleen.py │ ├── metadata │ │ └── test_metadata.py │ ├── predict │ │ ├── test_aggregation.py │ │ ├── test_best_of_n.py │ │ ├── test_chain_of_thought.py │ │ ├── test_code_act.py │ │ ├── test_knn.py │ │ ├── test_multi_chain_comparison.py │ │ ├── test_parallel.py │ │ ├── test_predict.py │ │ ├── test_program_of_thought.py │ │ ├── test_react.py │ │ ├── test_refine.py │ │ └── test_retry.py │ ├── primitives │ │ ├── resources │ │ │ └── saved_program.json │ │ ├── test_base_module.py │ │ ├── test_example.py │ │ ├── test_module.py │ │ └── test_python_interpreter.py │ ├── propose │ │ └── test_grounded_proposer.py │ ├── README.md │ ├── reliability │ │ ├── __init__.py │ │ ├── complex_types │ │ │ └── generated │ │ │ ├── test_many_types_1 │ │ │ │ ├── inputs │ │ │ │ │ ├── input1.json │ │ │ │ │ └── input2.json │ │ │ │ ├── program.py │ │ │ │ └── schema.json │ │ │ ├── test_nesting_1 │ │ │ │ ├── inputs │ │ │ │ │ ├── input1.json │ │ │ │ │ └── input2.json │ │ │ │ ├── program.py │ │ │ │ └── schema.json │ │ │ └── test_nesting_2 │ │ │ ├── inputs │ │ │ │ └── input1.json │ │ │ ├── program.py │ │ │ └── schema.json │ │ ├── conftest.py │ │ ├── generate │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ └── utils.py │ │ ├── input_formats │ │ │ └── generated │ │ │ └── test_markdown_1 │ │ │ ├── inputs │ │ │ │ ├── input1.json │ │ │ │ └── input2.json │ │ │ ├── program.py │ │ │ └── schema.json │ │ ├── README.md │ │ ├── reliability_conf.yaml │ │ ├── test_generated.py │ │ ├── test_pydantic_models.py │ │ └── utils.py │ ├── retrievers │ │ └── test_embeddings.py │ ├── signatures │ │ ├── test_adapter_image.py │ │ ├── test_custom_types.py │ │ └── test_signature.py │ ├── streaming │ │ └── test_streaming.py │ ├── teleprompt │ │ ├── gepa_dummy_lm_custom_component_selector_custom_instruction_proposer.json │ │ ├── gepa_dummy_lm.json │ │ ├── test_bootstrap_finetune.py │ │ ├── test_bootstrap_trace.py │ │ ├── test_bootstrap.py │ │ ├── test_copro_optimizer.py │ │ ├── test_ensemble.py │ │ ├── test_finetune.py │ │ ├── test_gepa_instruction_proposer.py │ │ ├── test_gepa.py │ │ ├── test_grpo.py │ │ ├── test_knn_fewshot.py │ │ ├── test_random_search.py │ │ ├── test_teleprompt.py │ │ └── test_utils.py │ ├── test_utils │ │ ├── __init__.py │ │ └── server │ │ ├── __init__.py │ │ ├── litellm_server_config.yaml │ │ └── litellm_server.py │ └── utils │ ├── __init__.py │ ├── resources │ │ └── mcp_server.py │ ├── test_annotation.py │ ├── test_asyncify.py │ ├── test_exceptions.py │ ├── test_langchain_tool.py │ ├── test_mcp.py │ ├── test_parallelizer.py │ ├── test_saving.py │ ├── test_settings.py │ ├── test_syncify.py │ ├── test_unbatchify.py │ └── test_usage_tracker.py └── uv.lock ``` # Files -------------------------------------------------------------------------------- /dspy/predict/best_of_n.py: -------------------------------------------------------------------------------- ```python 1 | from typing import Callable 2 | 3 | import dspy 4 | from dspy.predict.predict import Module, Prediction 5 | 6 | 7 | class BestOfN(Module): 8 | def __init__( 9 | self, 10 | module: Module, 11 | N: int, # noqa: N803 12 | reward_fn: Callable[[dict, Prediction], float], 13 | threshold: float, 14 | fail_count: int | None = None, 15 | ): 16 | """ 17 | Runs a module up to `N` times with different rollout IDs at `temperature=1.0` and 18 | returns the best prediction out of `N` attempts or the first prediction that passes the 19 | `threshold`. 20 | 21 | Args: 22 | module (Module): The module to run. 23 | N (int): The number of times to run the module. 24 | reward_fn (Callable[[dict, Prediction], float]): The reward function which takes in the args passed to the module, the resulting prediction, and returns a scalar reward. 25 | threshold (float): The threshold for the reward function. 26 | fail_count (Optional[int], optional): The number of times the module can fail before raising an error. Defaults to N if not provided. 27 | 28 | Example: 29 | ```python 30 | import dspy 31 | 32 | dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) 33 | 34 | # Define a QA module with chain of thought 35 | qa = dspy.ChainOfThought("question -> answer") 36 | 37 | # Define a reward function that checks for one-word answers 38 | def one_word_answer(args, pred): 39 | return 1.0 if len(pred.answer.split()) == 1 else 0.0 40 | 41 | # Create a refined module that tries up to 3 times 42 | best_of_3 = dspy.BestOfN(module=qa, N=3, reward_fn=one_word_answer, threshold=1.0) 43 | 44 | # Use the refined module 45 | result = best_of_3(question="What is the capital of Belgium?").answer 46 | # Returns: Brussels 47 | ``` 48 | """ 49 | self.module = module 50 | self.reward_fn = lambda *args: reward_fn(*args) # to prevent this from becoming a parameter 51 | self.threshold = threshold 52 | self.N = N 53 | self.fail_count = fail_count or N # default to N if fail_count is not provided 54 | 55 | def forward(self, **kwargs): 56 | lm = self.module.get_lm() or dspy.settings.lm 57 | start = lm.kwargs.get("rollout_id", 0) 58 | rollout_ids = [start + i for i in range(self.N)] 59 | best_pred, best_trace, best_reward = None, None, -float("inf") 60 | 61 | for idx, rid in enumerate(rollout_ids): 62 | lm_ = lm.copy(rollout_id=rid, temperature=1.0) 63 | mod = self.module.deepcopy() 64 | mod.set_lm(lm_) 65 | 66 | try: 67 | with dspy.context(trace=[]): 68 | pred = mod(**kwargs) 69 | trace = dspy.settings.trace.copy() 70 | 71 | # NOTE: Not including the trace of reward_fn. 72 | reward = self.reward_fn(kwargs, pred) 73 | 74 | if reward > best_reward: 75 | best_reward, best_pred, best_trace = reward, pred, trace 76 | 77 | if reward >= self.threshold: 78 | break 79 | 80 | except Exception as e: 81 | print(f"BestOfN: Attempt {idx + 1} failed with rollout id {rid}: {e}") 82 | if idx > self.fail_count: 83 | raise e 84 | self.fail_count -= 1 85 | 86 | if best_trace: 87 | dspy.settings.trace.extend(best_trace) 88 | return best_pred 89 | ``` -------------------------------------------------------------------------------- /dspy/clients/provider.py: -------------------------------------------------------------------------------- ```python 1 | from abc import abstractmethod 2 | from concurrent.futures import Future 3 | from threading import Thread 4 | from typing import TYPE_CHECKING, Any 5 | 6 | from dspy.clients.utils_finetune import MultiGPUConfig, TrainDataFormat 7 | 8 | if TYPE_CHECKING: 9 | from dspy.clients.lm import LM 10 | 11 | 12 | class TrainingJob(Future): 13 | def __init__( 14 | self, 15 | thread: Thread | None = None, 16 | model: str | None = None, 17 | train_data: list[dict[str, Any]] | None = None, 18 | train_data_format: TrainDataFormat | None = None, 19 | train_kwargs: dict[str, Any] | None = None, 20 | ): 21 | self.thread = thread 22 | self.model = model 23 | self.train_data = train_data 24 | self.train_data_format = train_data_format 25 | self.train_kwargs = train_kwargs or {} 26 | super().__init__() 27 | 28 | # Subclasses should override the cancel method to cancel the job; then call 29 | # the super's cancel method so that the future can be cancelled. 30 | def cancel(self): 31 | super().cancel() 32 | 33 | @abstractmethod 34 | def status(self): 35 | raise NotImplementedError 36 | 37 | 38 | class ReinforceJob: 39 | def __init__(self, lm: "LM", train_kwargs: dict[str, Any] | None = None, gpu_config: MultiGPUConfig = MultiGPUConfig(num_inference_gpus=1, num_training_gpus=1)): 40 | self.lm = lm 41 | self.train_kwargs = train_kwargs or {} 42 | self.gpu_config = gpu_config 43 | self.checkpoints = {} 44 | self.last_checkpoint = None 45 | self.gpu_config = gpu_config 46 | 47 | 48 | @abstractmethod 49 | def initialize(self): 50 | raise NotImplementedError 51 | 52 | @abstractmethod 53 | def step(self, train_data: list[dict[str, Any]], train_data_format: TrainDataFormat | str | None = None): 54 | raise NotImplementedError 55 | 56 | @abstractmethod 57 | def terminate(self): 58 | raise NotImplementedError 59 | 60 | @abstractmethod 61 | def update_model(self): 62 | raise NotImplementedError 63 | 64 | @abstractmethod 65 | def save_checkpoint(self, checkpoint_name: str): 66 | raise NotImplementedError 67 | 68 | def cancel(self): 69 | raise NotImplementedError 70 | 71 | def status(self): 72 | raise NotImplementedError 73 | 74 | 75 | class Provider: 76 | def __init__(self): 77 | self.finetunable = False 78 | self.reinforceable = False 79 | self.TrainingJob = TrainingJob 80 | self.ReinforceJob = ReinforceJob 81 | 82 | @staticmethod 83 | def is_provider_model(model: str) -> bool: 84 | # Subclasses should actually check whether a model is supported if they 85 | # want to have the model provider auto-discovered. 86 | return False 87 | 88 | @staticmethod 89 | def launch(lm: "LM", launch_kwargs: dict[str, Any] | None = None): 90 | # Note that "launch" and "kill" methods might be called even if there 91 | # is a launched LM or no launched LM to kill. These methods should be 92 | # resillient to such cases. 93 | pass 94 | 95 | @staticmethod 96 | def kill(lm: "LM", launch_kwargs: dict[str, Any] | None = None): 97 | # We assume that LM.launch_kwargs dictionary will contain the necessary 98 | # information for a provider to launch and/or kill an LM. This is the 99 | # reeason why the argument here is named launch_kwargs and not 100 | # kill_kwargs. 101 | pass 102 | 103 | @staticmethod 104 | def finetune( 105 | job: TrainingJob, 106 | model: str, 107 | train_data: list[dict[str, Any]], 108 | train_data_format: TrainDataFormat | str | None, 109 | train_kwargs: dict[str, Any] | None = None, 110 | ) -> str: 111 | raise NotImplementedError 112 | ``` -------------------------------------------------------------------------------- /dspy/datasets/colors.py: -------------------------------------------------------------------------------- ```python 1 | import random 2 | 3 | from dspy.datasets.dataset import Dataset 4 | 5 | ### A bunch of colors, originally from matplotlib 6 | all_colors = [ 7 | "alice blue", 8 | "dodger blue", 9 | "light sky blue", 10 | "deep sky blue", 11 | "sky blue", 12 | "steel blue", 13 | "light steel blue", 14 | "medium blue", 15 | "navy blue", 16 | "blue", 17 | "royal blue", 18 | "cadet blue", 19 | "cornflower blue", 20 | "medium slate blue", 21 | "slate blue", 22 | "dark slate blue", 23 | "powder blue", 24 | "turquoise", 25 | "dark turquoise", 26 | "medium turquoise", 27 | "pale turquoise", 28 | "light sea green", 29 | "medium sea green", 30 | "sea green", 31 | "forest green", 32 | "green yellow", 33 | "lime green", 34 | "dark green", 35 | "green", 36 | "lime", 37 | "chartreuse", 38 | "lawn green", 39 | "yellow green", 40 | "olive green", 41 | "dark olive green", 42 | "medium spring green", 43 | "spring green", 44 | "medium aquamarine", 45 | "aquamarine", 46 | "aqua", 47 | "cyan", 48 | "dark cyan", 49 | "teal", 50 | "medium orchid", 51 | "dark orchid", 52 | "orchid", 53 | "blue violet", 54 | "violet", 55 | "dark violet", 56 | "plum", 57 | "thistle", 58 | "magenta", 59 | "fuchsia", 60 | "dark magenta", 61 | "medium purple", 62 | "purple", 63 | "rebecca purple", 64 | "dark red", 65 | "fire brick", 66 | "indian red", 67 | "light coral", 68 | "dark salmon", 69 | "light salmon", 70 | "salmon", 71 | "red", 72 | "crimson", 73 | "tomato", 74 | "coral", 75 | "orange red", 76 | "dark orange", 77 | "orange", 78 | "yellow", 79 | "gold", 80 | "light goldenrod yellow", 81 | "pale goldenrod", 82 | "goldenrod", 83 | "dark goldenrod", 84 | "beige", 85 | "moccasin", 86 | "blanched almond", 87 | "navajo white", 88 | "antique white", 89 | "bisque", 90 | "burlywood", 91 | "dark khaki", 92 | "khaki", 93 | "tan", 94 | "wheat", 95 | "snow", 96 | "floral white", 97 | "old lace", 98 | "ivory", 99 | "linen", 100 | "seashell", 101 | "honeydew", 102 | "mint cream", 103 | "azure", 104 | "lavender", 105 | "ghost white", 106 | "white smoke", 107 | "gainsboro", 108 | "light gray", 109 | "silver", 110 | "dark gray", 111 | "gray", 112 | "dim gray", 113 | "slate gray", 114 | "light slate gray", 115 | "dark slate gray", 116 | "black", 117 | "medium violet red", 118 | "pale violet red", 119 | "deep pink", 120 | "hot pink", 121 | "light pink", 122 | "pink", 123 | "peach puff", 124 | "rosy brown", 125 | "saddle brown", 126 | "sandy brown", 127 | "chocolate", 128 | "peru", 129 | "sienna", 130 | "brown", 131 | "maroon", 132 | "white", 133 | "misty rose", 134 | "lavender blush", 135 | "papaya whip", 136 | "lemon chiffon", 137 | "light yellow", 138 | "corn silk", 139 | "pale green", 140 | "light green", 141 | "olive drab", 142 | "olive", 143 | "dark sea green", 144 | ] 145 | 146 | 147 | class Colors(Dataset): 148 | def __init__(self, sort_by_suffix=True, *args, **kwargs) -> None: 149 | super().__init__(*args, **kwargs) 150 | 151 | self.sort_by_suffix = sort_by_suffix 152 | colors = self.sorted_by_suffix(all_colors) 153 | 154 | train_size = int( 155 | len(colors) * 0.6 156 | ) # chosen to ensure that similar colors aren't repeated between train and dev 157 | train_colors, dev_colors = colors[:train_size], colors[train_size:] 158 | 159 | self._train = [{"color": color} for color in train_colors] 160 | self._dev = [{"color": color} for color in dev_colors] 161 | 162 | random.Random(0).shuffle(self._train) 163 | random.Random(0).shuffle(self._dev) 164 | 165 | def sorted_by_suffix(self, colors): 166 | if not self.sort_by_suffix: 167 | return colors 168 | 169 | if isinstance(colors[0], str): 170 | sorted_colors = sorted(colors, key=lambda x: x[::-1]) 171 | else: 172 | sorted_colors = sorted(colors, key=lambda x: x["color"][::-1]) 173 | 174 | return sorted_colors 175 | ``` -------------------------------------------------------------------------------- /dspy/utils/unbatchify.py: -------------------------------------------------------------------------------- ```python 1 | import queue 2 | import threading 3 | import time 4 | from concurrent.futures import Future 5 | from typing import Any, Callable 6 | 7 | 8 | class Unbatchify: 9 | def __init__( 10 | self, 11 | batch_fn: Callable[[list[Any]], list[Any]], 12 | max_batch_size: int = 32, 13 | max_wait_time: float = 0.1 14 | ): 15 | """ 16 | Initializes the Unbatchify. 17 | 18 | Args: 19 | batch_fn: The batch-processing function that accepts a list of inputs and returns a list of outputs. 20 | max_batch_size: The maximum number of items to include in a batch. 21 | max_wait_time: The maximum time (in seconds) to wait for batch to fill before processing. 22 | """ 23 | 24 | self.batch_fn = batch_fn 25 | self.max_batch_size = max_batch_size 26 | self.max_wait_time = max_wait_time 27 | self.input_queue = queue.Queue() 28 | self.stop_event = threading.Event() 29 | self.worker_thread = threading.Thread(target=self._worker) 30 | self.worker_thread.daemon = True # Ensures thread exits when main program exits 31 | self.worker_thread.start() 32 | 33 | def __call__(self, input_item: Any) -> Any: 34 | """ 35 | Thread-safe function that accepts a single input and returns the corresponding output. 36 | 37 | Args: 38 | input_item: The single input item to process. 39 | 40 | Returns: 41 | The output corresponding to the input_item after processing through batch_fn. 42 | """ 43 | future = Future() 44 | self.input_queue.put((input_item, future)) 45 | try: 46 | result = future.result() 47 | except Exception as e: 48 | raise e 49 | return result 50 | 51 | def _worker(self): 52 | """ 53 | Worker thread that batches inputs and processes them using batch_fn. 54 | """ 55 | while not self.stop_event.is_set(): 56 | batch = [] 57 | futures = [] 58 | start_time = time.time() 59 | while len(batch) < self.max_batch_size and (time.time() - start_time) < self.max_wait_time: 60 | try: 61 | input_item, future = self.input_queue.get(timeout=self.max_wait_time) 62 | batch.append(input_item) 63 | futures.append(future) 64 | except queue.Empty: 65 | break 66 | 67 | if batch: 68 | try: 69 | outputs = self.batch_fn(batch) 70 | for output, future in zip(outputs, futures, strict=False): 71 | future.set_result(output) 72 | except Exception as e: 73 | for future in futures: 74 | future.set_exception(e) 75 | else: 76 | time.sleep(0.01) 77 | 78 | # Clean up remaining items when stopping 79 | while True: 80 | try: 81 | _, future = self.input_queue.get_nowait() 82 | future.set_exception(RuntimeError("Unbatchify is closed")) 83 | except queue.Empty: 84 | break 85 | 86 | print("Worker thread has been terminated.") 87 | 88 | def close(self): 89 | """ 90 | Stops the worker thread and cleans up resources. 91 | """ 92 | if not self.stop_event.is_set(): 93 | self.stop_event.set() 94 | self.worker_thread.join() 95 | 96 | def __enter__(self): 97 | """ 98 | Enables use as a context manager. 99 | """ 100 | return self 101 | 102 | def __exit__(self, exc_type, exc_value, traceback): 103 | """ 104 | Ensures resources are cleaned up when exiting context. 105 | """ 106 | self.close() 107 | 108 | def __del__(self): 109 | """ 110 | Ensures the worker thread is terminated when the object is garbage collected. 111 | """ 112 | self.close() 113 | ``` -------------------------------------------------------------------------------- /dspy/signatures/field.py: -------------------------------------------------------------------------------- ```python 1 | import pydantic 2 | 3 | # The following arguments can be used in DSPy InputField and OutputField in addition 4 | # to the standard pydantic.Field arguments. We just hope pydanitc doesn't add these, 5 | # as it would give a name clash. 6 | DSPY_FIELD_ARG_NAMES = ["desc", "prefix", "format", "parser", "__dspy_field_type"] 7 | 8 | PYDANTIC_CONSTRAINT_MAP = { 9 | "gt": "greater than: ", 10 | "ge": "greater than or equal to: ", 11 | "lt": "less than: ", 12 | "le": "less than or equal to: ", 13 | "min_length": "minimum length: ", 14 | "max_length": "maximum length: ", 15 | "multiple_of": "a multiple of the given number: ", 16 | "allow_inf_nan": "allow 'inf', '-inf', 'nan' values: ", 17 | } 18 | 19 | 20 | def move_kwargs(**kwargs): 21 | # Pydantic doesn't allow arbitrary arguments to be given to fields, 22 | # but asks that 23 | # > any extra data you want to add to the JSON schema should be passed 24 | # > as a dictionary to the json_schema_extra keyword argument. 25 | # See: https://docs.pydantic.dev/2.6/migration/#changes-to-pydanticfield 26 | pydantic_kwargs = {} 27 | json_schema_extra = {} 28 | for k, v in kwargs.items(): 29 | if k in DSPY_FIELD_ARG_NAMES: 30 | json_schema_extra[k] = v 31 | else: 32 | pydantic_kwargs[k] = v 33 | # Also copy over the pydantic "description" if no dspy "desc" is given. 34 | if "description" in kwargs and "desc" not in json_schema_extra: 35 | json_schema_extra["desc"] = kwargs["description"] 36 | constraints = _translate_pydantic_field_constraints(**kwargs) 37 | if constraints: 38 | json_schema_extra["constraints"] = constraints 39 | pydantic_kwargs["json_schema_extra"] = json_schema_extra 40 | return pydantic_kwargs 41 | 42 | 43 | def _translate_pydantic_field_constraints(**kwargs): 44 | """Extracts Pydantic constraints and translates them into human-readable format.""" 45 | 46 | constraints = [] 47 | for key, value in kwargs.items(): 48 | if key in PYDANTIC_CONSTRAINT_MAP: 49 | constraints.append(f"{PYDANTIC_CONSTRAINT_MAP[key]}{value}") 50 | 51 | return ", ".join(constraints) 52 | 53 | 54 | def InputField(**kwargs): # noqa: N802 55 | return pydantic.Field(**move_kwargs(**kwargs, __dspy_field_type="input")) 56 | 57 | 58 | def OutputField(**kwargs): # noqa: N802 59 | return pydantic.Field(**move_kwargs(**kwargs, __dspy_field_type="output")) 60 | 61 | 62 | def new_to_old_field(field): 63 | return (OldInputField if field.json_schema_extra["__dspy_field_type"] == "input" else OldOutputField)( 64 | prefix=field.json_schema_extra["prefix"], 65 | desc=field.json_schema_extra["desc"], 66 | format=field.json_schema_extra.get("format"), 67 | ) 68 | 69 | 70 | class OldField: 71 | """A more ergonomic datatype that infers prefix and desc if omitted.""" 72 | 73 | def __init__(self, *, prefix=None, desc=None, input, format=None): 74 | self.prefix = prefix # This can be None initially and set later 75 | self.desc = desc 76 | self.format = format 77 | 78 | def finalize(self, key, inferred_prefix): 79 | """Set the prefix if it's not provided explicitly.""" 80 | if self.prefix is None: 81 | self.prefix = inferred_prefix + ":" 82 | 83 | if self.desc is None: 84 | self.desc = f"${{{key}}}" 85 | 86 | def __repr__(self): 87 | return f"{self.__class__.__name__}(prefix={self.prefix}, desc={self.desc})" 88 | 89 | def __eq__(self, __value: object) -> bool: 90 | return self.__dict__ == __value.__dict__ 91 | 92 | 93 | class OldInputField(OldField): 94 | def __init__(self, *, prefix=None, desc=None, format=None): 95 | super().__init__(prefix=prefix, desc=desc, input=True, format=format) 96 | 97 | 98 | class OldOutputField(OldField): 99 | def __init__(self, *, prefix=None, desc=None, format=None): 100 | super().__init__(prefix=prefix, desc=desc, input=False, format=format) 101 | ``` -------------------------------------------------------------------------------- /dspy/clients/__init__.py: -------------------------------------------------------------------------------- ```python 1 | import logging 2 | import os 3 | from pathlib import Path 4 | 5 | import litellm 6 | 7 | from dspy.clients.base_lm import BaseLM, inspect_history 8 | from dspy.clients.cache import Cache 9 | from dspy.clients.embedding import Embedder 10 | from dspy.clients.lm import LM 11 | from dspy.clients.provider import Provider, TrainingJob 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | DISK_CACHE_DIR = os.environ.get("DSPY_CACHEDIR") or os.path.join(Path.home(), ".dspy_cache") 16 | DISK_CACHE_LIMIT = int(os.environ.get("DSPY_CACHE_LIMIT", 3e10)) # 30 GB default 17 | def configure_cache( 18 | enable_disk_cache: bool | None = True, 19 | enable_memory_cache: bool | None = True, 20 | disk_cache_dir: str | None = DISK_CACHE_DIR, 21 | disk_size_limit_bytes: int | None = DISK_CACHE_LIMIT, 22 | memory_max_entries: int | None = 1000000, 23 | ): 24 | """Configure the cache for DSPy. 25 | 26 | Args: 27 | enable_disk_cache: Whether to enable on-disk cache. 28 | enable_memory_cache: Whether to enable in-memory cache. 29 | disk_cache_dir: The directory to store the on-disk cache. 30 | disk_size_limit_bytes: The size limit of the on-disk cache. 31 | memory_max_entries: The maximum number of entries in the in-memory cache. 32 | """ 33 | 34 | DSPY_CACHE = Cache( 35 | enable_disk_cache, 36 | enable_memory_cache, 37 | disk_cache_dir, 38 | disk_size_limit_bytes, 39 | memory_max_entries, 40 | ) 41 | 42 | import dspy 43 | # Update the reference to point to the new cache 44 | dspy.cache = DSPY_CACHE 45 | 46 | 47 | litellm.telemetry = False 48 | litellm.cache = None # By default we disable LiteLLM cache and use DSPy on-disk cache. 49 | 50 | def _get_dspy_cache(): 51 | disk_cache_dir = os.environ.get("DSPY_CACHEDIR") or os.path.join(Path.home(), ".dspy_cache") 52 | disk_cache_limit = int(os.environ.get("DSPY_CACHE_LIMIT", 3e10)) 53 | 54 | try: 55 | _dspy_cache = Cache( 56 | enable_disk_cache=True, 57 | enable_memory_cache=True, 58 | disk_cache_dir=disk_cache_dir, 59 | disk_size_limit_bytes=disk_cache_limit, 60 | memory_max_entries=1000000, 61 | ) 62 | except Exception as e: 63 | # If cache creation fails (e.g., in AWS Lambda), create a memory-only cache 64 | logger.warning("Failed to initialize disk cache, falling back to memory-only cache: %s", e) 65 | _dspy_cache = Cache( 66 | enable_disk_cache=False, 67 | enable_memory_cache=True, 68 | disk_cache_dir=disk_cache_dir, 69 | disk_size_limit_bytes=disk_cache_limit, 70 | memory_max_entries=1000000, 71 | ) 72 | return _dspy_cache 73 | 74 | DSPY_CACHE = _get_dspy_cache() 75 | 76 | if "LITELLM_LOCAL_MODEL_COST_MAP" not in os.environ: 77 | # Accessed at run time by litellm; i.e., fine to keep after import 78 | os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" 79 | 80 | 81 | def configure_litellm_logging(level: str = "ERROR"): 82 | """Configure LiteLLM logging to the specified level.""" 83 | # Litellm uses a global logger called `verbose_logger` to control all loggings. 84 | from litellm._logging import verbose_logger 85 | 86 | numeric_logging_level = getattr(logging, level) 87 | 88 | verbose_logger.setLevel(numeric_logging_level) 89 | for h in verbose_logger.handlers: 90 | h.setLevel(numeric_logging_level) 91 | 92 | 93 | def enable_litellm_logging(): 94 | litellm.suppress_debug_info = False 95 | configure_litellm_logging("DEBUG") 96 | 97 | 98 | def disable_litellm_logging(): 99 | litellm.suppress_debug_info = True 100 | configure_litellm_logging("ERROR") 101 | 102 | 103 | # By default, we disable LiteLLM logging for clean logging 104 | disable_litellm_logging() 105 | 106 | __all__ = [ 107 | "BaseLM", 108 | "LM", 109 | "Provider", 110 | "TrainingJob", 111 | "inspect_history", 112 | "Embedder", 113 | "enable_litellm_logging", 114 | "disable_litellm_logging", 115 | "configure_cache", 116 | ] 117 | ``` -------------------------------------------------------------------------------- /tests/adapters/test_adapter_utils.py: -------------------------------------------------------------------------------- ```python 1 | # ruff: noqa: UP007 2 | 3 | from typing import Literal, Optional, Union 4 | 5 | import pytest 6 | from pydantic import BaseModel 7 | 8 | from dspy.adapters.utils import parse_value 9 | 10 | 11 | class Profile(BaseModel): 12 | name: str 13 | age: int 14 | 15 | 16 | def test_parse_value_str_annotation(): 17 | # Test basic string conversion 18 | assert parse_value(123, str) == "123" 19 | assert parse_value(True, str) == "True" 20 | assert parse_value("hello", str) == "hello" 21 | assert parse_value(None, str) == "None" 22 | assert parse_value([1, 2, 3], str) == "[1, 2, 3]" 23 | 24 | 25 | def test_parse_value_pydantic_types(): 26 | # Test with pydantic BaseModel - JSON string input 27 | json_str = '{"name": "John", "age": 30}' 28 | result = parse_value(json_str, Profile) 29 | assert isinstance(result, Profile) 30 | assert result.name == "John" 31 | assert result.age == 30 32 | 33 | # Test with pydantic BaseModel - dict input 34 | dict_input = {"name": "Jane", "age": 25} 35 | result = parse_value(dict_input, Profile) 36 | assert isinstance(result, Profile) 37 | assert result.name == "Jane" 38 | assert result.age == 25 39 | 40 | # Test with invalid pydantic data 41 | with pytest.raises(Exception): 42 | parse_value('{"name": "John"}', Profile) # missing required age field 43 | 44 | 45 | def test_parse_value_basic_types(): 46 | # Test int 47 | assert parse_value("42", int) == 42 48 | assert parse_value(42, int) == 42 49 | 50 | # Test float 51 | assert parse_value("3.14", float) == 3.14 52 | assert parse_value(3.14, float) == 3.14 53 | 54 | # Test bool 55 | assert parse_value("true", bool) is True 56 | assert parse_value(True, bool) is True 57 | assert parse_value("false", bool) is False 58 | 59 | # Test list 60 | assert parse_value("[1, 2, 3]", list[int]) == [1, 2, 3] 61 | assert parse_value([1, 2, 3], list[int]) == [1, 2, 3] 62 | 63 | 64 | def test_parse_value_literal(): 65 | # Test Literal type 66 | assert parse_value("option1", Literal["option1", "option2"]) == "option1" 67 | assert parse_value("option2", Literal["option1", "option2"]) == "option2" 68 | 69 | # Test Literal with quotes and prefixes 70 | assert parse_value("'option1'", Literal["option1", "option2"]) == "option1" 71 | assert parse_value('"option1"', Literal["option1", "option2"]) == "option1" 72 | assert parse_value("Literal[option1]", Literal["option1", "option2"]) == "option1" 73 | assert parse_value("str[option1]", Literal["option1", "option2"]) == "option1" 74 | 75 | # Test invalid literal 76 | with pytest.raises(ValueError): 77 | parse_value("invalid", Literal["option1", "option2"]) 78 | 79 | 80 | def test_parse_value_union(): 81 | # Test Union with None (Optional) 82 | assert parse_value("test", Optional[str]) == "test" 83 | assert parse_value("test", str | None) == "test" 84 | assert parse_value("5", int | None) == 5 85 | assert parse_value(None, Optional[str]) is None 86 | assert parse_value("text with [placeholder]", Optional[str]) == "text with [placeholder]" 87 | assert parse_value("text with [placeholder]", str | None) == "text with [placeholder]" 88 | 89 | # Test Union fallback to str 90 | assert parse_value("fallback", Union[int, str, None]) == "fallback" 91 | assert parse_value(5, Union[int, str, None]) == 5 92 | assert parse_value("fallback", int | str | None) == "fallback" 93 | assert parse_value(5, int | str | None) == 5 94 | assert parse_value("text with [placeholder]", Union[int, str, None]) == "text with [placeholder]" 95 | 96 | 97 | def test_parse_value_json_repair(): 98 | # Test cases where json_repair is needed 99 | assert parse_value('{"key": "value"}', dict) == {"key": "value"} 100 | 101 | # Test ast.literal_eval fallback 102 | assert parse_value("{'key': 'value'}", dict) == {"key": "value"} 103 | 104 | # Test fallback to original value when parsing fails 105 | malformed = "not json or literal" 106 | with pytest.raises(Exception): 107 | parse_value(malformed, dict) 108 | ``` -------------------------------------------------------------------------------- /dspy/adapters/types/document.py: -------------------------------------------------------------------------------- ```python 1 | from typing import Any, Literal 2 | 3 | import pydantic 4 | 5 | from dspy.adapters.types.base_type import Type 6 | from dspy.utils.annotation import experimental 7 | 8 | 9 | @experimental(version="3.0.4") 10 | class Document(Type): 11 | """A document type for providing content that can be cited by language models. 12 | 13 | This type represents documents that can be passed to language models for citation-enabled 14 | responses, particularly useful with Anthropic's Citations API. Documents include the content 15 | and metadata that helps the LM understand and reference the source material. 16 | 17 | Attributes: 18 | data: The text content of the document 19 | title: Optional title for the document (used in citations) 20 | media_type: MIME type of the document content (defaults to "text/plain") 21 | context: Optional context information about the document 22 | 23 | Example: 24 | ```python 25 | import dspy 26 | from dspy.signatures import Signature 27 | from dspy.experimental import Document, Citations 28 | 29 | class AnswerWithSources(Signature): 30 | '''Answer questions using provided documents with citations.''' 31 | documents: list[Document] = dspy.InputField() 32 | question: str = dspy.InputField() 33 | answer: str = dspy.OutputField() 34 | citations: Citations = dspy.OutputField() 35 | 36 | # Create documents 37 | docs = [ 38 | Document( 39 | data="The Earth orbits the Sun in an elliptical path.", 40 | title="Basic Astronomy Facts" 41 | ), 42 | Document( 43 | data="Water boils at 100°C at standard atmospheric pressure.", 44 | title="Physics Fundamentals", 45 | ) 46 | ] 47 | 48 | # Use with a citation-supporting model 49 | lm = dspy.LM("anthropic/claude-opus-4-1-20250805") 50 | predictor = dspy.Predict(AnswerWithSources) 51 | result = predictor(documents=docs, question="What temperature does water boil?", lm=lm) 52 | print(result.citations) 53 | ``` 54 | """ 55 | 56 | data: str 57 | title: str | None = None 58 | media_type: Literal["text/plain", "application/pdf"] = "text/plain" 59 | context: str | None = None 60 | 61 | def format(self) -> list[dict[str, Any]]: 62 | """Format document for LM consumption. 63 | 64 | Returns: 65 | A list containing the document block in the format expected by citation-enabled language models. 66 | """ 67 | document_block = { 68 | "type": "document", 69 | "source": { 70 | "type": "text", 71 | "media_type": self.media_type, 72 | "data": self.data 73 | }, 74 | "citations": {"enabled": True} 75 | } 76 | 77 | if self.title: 78 | document_block["title"] = self.title 79 | 80 | if self.context: 81 | document_block["context"] = self.context 82 | 83 | return [document_block] 84 | 85 | 86 | 87 | @classmethod 88 | def description(cls) -> str: 89 | """Description of the document type for use in prompts.""" 90 | return ( 91 | "A document containing text content that can be referenced and cited. " 92 | "Include the full text content and optionally a title for proper referencing." 93 | ) 94 | 95 | @pydantic.model_validator(mode="before") 96 | @classmethod 97 | def validate_input(cls, data: Any): 98 | if isinstance(data, cls): 99 | return data 100 | 101 | # Handle case where data is just a string (data only) 102 | if isinstance(data, str): 103 | return {"data": data} 104 | 105 | # Handle case where data is a dict 106 | elif isinstance(data, dict): 107 | return data 108 | 109 | raise ValueError(f"Received invalid value for `Document`: {data}") 110 | 111 | def __str__(self) -> str: 112 | """String representation showing title and content length.""" 113 | title_part = f"'{self.title}': " if self.title else "" 114 | return f"Document({title_part}{len(self.data)} chars)" 115 | ``` -------------------------------------------------------------------------------- /dspy/adapters/types/code.py: -------------------------------------------------------------------------------- ```python 1 | import re 2 | from typing import Any, ClassVar 3 | 4 | import pydantic 5 | from pydantic import create_model 6 | 7 | from dspy.adapters.types.base_type import Type 8 | 9 | 10 | class Code(Type): 11 | """Code type in DSPy. 12 | 13 | This type is useful for code generation and code analysis. 14 | 15 | Example 1: dspy.Code as output type in code generation: 16 | 17 | ```python 18 | import dspy 19 | 20 | dspy.configure(lm=dspy.LM("openai/gpt-4o-mini")) 21 | 22 | 23 | class CodeGeneration(dspy.Signature): 24 | '''Generate python code to answer the question.''' 25 | 26 | question: str = dspy.InputField(description="The question to answer") 27 | code: dspy.Code["java"] = dspy.OutputField(description="The code to execute") 28 | 29 | 30 | predict = dspy.Predict(CodeGeneration) 31 | 32 | result = predict(question="Given an array, find if any of the two numbers sum up to 10") 33 | print(result.code) 34 | ``` 35 | 36 | Example 2: dspy.Code as input type in code analysis: 37 | 38 | ```python 39 | import dspy 40 | import inspect 41 | 42 | dspy.configure(lm=dspy.LM("openai/gpt-4o-mini")) 43 | 44 | class CodeAnalysis(dspy.Signature): 45 | '''Analyze the time complexity of the function.''' 46 | 47 | code: dspy.Code["python"] = dspy.InputField(description="The function to analyze") 48 | result: str = dspy.OutputField(description="The time complexity of the function") 49 | 50 | 51 | predict = dspy.Predict(CodeAnalysis) 52 | 53 | 54 | def sleepsort(x): 55 | import time 56 | 57 | for i in x: 58 | time.sleep(i) 59 | print(i) 60 | 61 | result = predict(code=inspect.getsource(sleepsort)) 62 | print(result.result) 63 | ``` 64 | """ 65 | 66 | code: str 67 | 68 | language: ClassVar[str] = "python" 69 | 70 | def format(self): 71 | return f"{self.code}" 72 | 73 | @pydantic.model_serializer() 74 | def serialize_model(self): 75 | """Override to bypass the <<CUSTOM-TYPE-START-IDENTIFIER>> and <<CUSTOM-TYPE-END-IDENTIFIER>> tags.""" 76 | return self.format() 77 | 78 | @classmethod 79 | def description(cls) -> str: 80 | return ( 81 | "Code represented in a string, specified in the `code` field. If this is an output field, the code " 82 | "field should follow the markdown code block format, e.g. \n```python\n{code}\n``` or \n```cpp\n{code}\n```" 83 | f"\nProgramming language: {cls.language}" 84 | ) 85 | 86 | @pydantic.model_validator(mode="before") 87 | @classmethod 88 | def validate_input(cls, data: Any): 89 | if isinstance(data, cls): 90 | return data 91 | 92 | if isinstance(data, str): 93 | return {"code": _filter_code(data)} 94 | 95 | if isinstance(data, dict): 96 | if "code" not in data: 97 | raise ValueError("`code` field is required for `dspy.Code`") 98 | if not isinstance(data["code"], str): 99 | raise ValueError(f"`code` field must be a string, but received type: {type(data['code'])}") 100 | return {"code": _filter_code(data["code"])} 101 | 102 | raise ValueError(f"Received invalid value for `dspy.Code`: {data}") 103 | 104 | 105 | def _filter_code(code: str) -> str: 106 | """Extract code from markdown code blocks, stripping any language identifier.""" 107 | # Case 1: format like: 108 | # ```python 109 | # {code_block} 110 | # ``` 111 | regex_pattern = r"```(?:[^\n]*)\n(.*?)```" 112 | match = re.search(regex_pattern, code, re.DOTALL) 113 | if match: 114 | return match.group(1).strip() 115 | # Case 2: ```<code>``` (no language, single-line) 116 | regex_pattern_simple = r"```(.*?)```" 117 | match = re.search(regex_pattern_simple, code, re.DOTALL) 118 | if match: 119 | return match.group(1).strip() 120 | # Fallback case 121 | return code 122 | 123 | 124 | # Patch __class_getitem__ directly on the class to support dspy.Code["python"] syntax 125 | def _code_class_getitem(cls, language): 126 | code_with_language_cls = create_model(f"{cls.__name__}_{language}", __base__=cls) 127 | code_with_language_cls.language = language 128 | return code_with_language_cls 129 | 130 | 131 | Code.__class_getitem__ = classmethod(_code_class_getitem) 132 | ``` -------------------------------------------------------------------------------- /docs/docs/deep-dive/data-handling/examples.md: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | sidebar_position: 1 3 | --- 4 | 5 | !!! warning "This page is outdated and may not be fully accurate in DSPy 2.5" 6 | 7 | # Examples in DSPy 8 | 9 | Working in DSPy involves training sets, development sets, and test sets. This is like traditional ML, but you usually need far fewer labels (or zero labels) to use DSPy effectively. 10 | 11 | The core data type for data in DSPy is `Example`. You will use **Examples** to represent items in your training set and test set. 12 | 13 | DSPy **Examples** are similar to Python `dict`s but have a few useful utilities. Your DSPy modules will return values of the type `Prediction`, which is a special sub-class of `Example`. 14 | 15 | ## Creating an `Example` 16 | 17 | When you use DSPy, you will do a lot of evaluation and optimization runs. Your individual datapoints will be of type `Example`: 18 | 19 | ```python 20 | qa_pair = dspy.Example(question="This is a question?", answer="This is an answer.") 21 | 22 | print(qa_pair) 23 | print(qa_pair.question) 24 | print(qa_pair.answer) 25 | ``` 26 | **Output:** 27 | ```text 28 | Example({'question': 'This is a question?', 'answer': 'This is an answer.'}) (input_keys=None) 29 | This is a question? 30 | This is an answer. 31 | ``` 32 | 33 | Examples can have any field keys and any value types, though usually values are strings. 34 | 35 | ```text 36 | object = Example(field1=value1, field2=value2, field3=value3, ...) 37 | ``` 38 | 39 | ## Specifying Input Keys 40 | 41 | In traditional ML, there are separated "inputs" and "labels". 42 | 43 | In DSPy, the `Example` objects have a `with_inputs()` method, which can mark specific fields as inputs. (The rest are just metadata or labels.) 44 | 45 | ```python 46 | # Single Input. 47 | print(qa_pair.with_inputs("question")) 48 | 49 | # Multiple Inputs; be careful about marking your labels as inputs unless you mean it. 50 | print(qa_pair.with_inputs("question", "answer")) 51 | ``` 52 | 53 | This flexibility allows for customized tailoring of the `Example` object for different DSPy scenarios. 54 | 55 | When you call `with_inputs()`, you get a new copy of the example. The original object is kept unchanged. 56 | 57 | 58 | ## Element Access and Updation 59 | 60 | Values can be accessed using the `.`(dot) operator. You can access the value of key `name` in defined object `Example(name="John Doe", job="sleep")` through `object.name`. 61 | 62 | To access or exclude certain keys, use `inputs()` and `labels()` methods to return new Example objects containing only input or non-input keys, respectively. 63 | 64 | ```python 65 | article_summary = dspy.Example(article= "This is an article.", summary= "This is a summary.").with_inputs("article") 66 | 67 | input_key_only = article_summary.inputs() 68 | non_input_key_only = article_summary.labels() 69 | 70 | print("Example object with Input fields only:", input_key_only) 71 | print("Example object with Non-Input fields only:", non_input_key_only) 72 | ``` 73 | 74 | **Output** 75 | ``` 76 | Example object with Input fields only: Example({'article': 'This is an article.'}) (input_keys=None) 77 | Example object with Non-Input fields only: Example({'summary': 'This is a summary.'}) (input_keys=None) 78 | ``` 79 | 80 | To exclude keys, use `without()`: 81 | 82 | ```python 83 | article_summary = dspy.Example(context="This is an article.", question="This is a question?", answer="This is an answer.", rationale= "This is a rationale.").with_inputs("context", "question") 84 | 85 | print("Example object without answer & rationale keys:", article_summary.without("answer", "rationale")) 86 | ``` 87 | 88 | **Output** 89 | ``` 90 | Example object without answer & rationale keys: Example({'context': 'This is an article.', 'question': 'This is a question?'}) (input_keys=None) 91 | ``` 92 | 93 | Updating values is simply assigning a new value using the `.` operator. 94 | 95 | ```python 96 | article_summary.context = "new context" 97 | ``` 98 | 99 | ## Iterating over Example 100 | 101 | Iteration in the `Example` class also functions like a dictionary, supporting methods like `keys()`, `values()`, etc: 102 | 103 | ```python 104 | for k, v in article_summary.items(): 105 | print(f"{k} = {v}") 106 | ``` 107 | 108 | **Output** 109 | 110 | ```text 111 | context = This is an article. 112 | question = This is a question? 113 | answer = This is an answer. 114 | rationale = This is a rationale. 115 | ``` 116 | ``` -------------------------------------------------------------------------------- /tests/reliability/complex_types/generated/test_nesting_2/schema.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "description": "This AI program is designed to process complex datasets with multiple nested input fields and produce structured output fields. It can handle cases where nested fields have the same name but different types, ensuring that the data is accurately processed and transformed. The program is particularly useful for applications that require detailed data analysis, integration of multiple data sources, and handling of heterogeneous data types.", 3 | "properties": { 4 | "customer": { 5 | "properties": { 6 | "customer_id": { 7 | "description": "Unique identifier for the customer", 8 | "type": "string" 9 | }, 10 | "customer_type": { 11 | "description": "Indicates if the customer is a premium member", 12 | "type": "boolean" 13 | }, 14 | "details": { 15 | "properties": { 16 | "age": { 17 | "description": "Customer's age", 18 | "type": "integer" 19 | }, 20 | "value": { 21 | "description": "Customer's value category", 22 | "type": "string" 23 | } 24 | }, 25 | "required": ["value", "age"], 26 | "type": "object" 27 | } 28 | }, 29 | "required": ["customer_id", "customer_type", "details"], 30 | "type": "object" 31 | }, 32 | "customer_summary": { 33 | "properties": { 34 | "customer_id": { 35 | "description": "Unique identifier for the customer", 36 | "type": "string" 37 | }, 38 | "customer_type": { 39 | "properties": { 40 | "category": { 41 | "description": "Customer's membership category", 42 | "type": "string" 43 | }, 44 | "is_premium": { 45 | "description": "Indicates if the customer is a premium member", 46 | "type": "boolean" 47 | } 48 | }, 49 | "required": ["is_premium", "category"], 50 | "type": "object" 51 | }, 52 | "value": { 53 | "description": "Customer's value category", 54 | "type": "string" 55 | } 56 | }, 57 | "required": ["customer_id", "customer_type", "value"], 58 | "type": "object" 59 | }, 60 | "transaction": { 61 | "properties": { 62 | "amount": { 63 | "description": "Transaction amount", 64 | "type": "number" 65 | }, 66 | "details": { 67 | "properties": { 68 | "timestamp": { 69 | "description": "Timestamp of the transaction", 70 | "format": "date-time", 71 | "type": "string" 72 | }, 73 | "value": { 74 | "description": "Monetary value of the transaction", 75 | "type": "number" 76 | } 77 | }, 78 | "required": ["value", "timestamp"], 79 | "type": "object" 80 | }, 81 | "transaction_id": { 82 | "description": "Unique identifier for the transaction", 83 | "type": "string" 84 | } 85 | }, 86 | "required": ["transaction_id", "amount", "details"], 87 | "type": "object" 88 | }, 89 | "transaction_summary": { 90 | "properties": { 91 | "details": { 92 | "properties": { 93 | "timestamp": { 94 | "description": "Timestamp of the transaction", 95 | "format": "date-time", 96 | "type": "string" 97 | }, 98 | "value": { 99 | "description": "Monetary value of the transaction", 100 | "type": "number" 101 | } 102 | }, 103 | "required": ["value", "timestamp"], 104 | "type": "object" 105 | }, 106 | "total_amount": { 107 | "description": "Total transaction amount", 108 | "type": "number" 109 | }, 110 | "transaction_id": { 111 | "description": "Unique identifier for the transaction", 112 | "type": "string" 113 | } 114 | }, 115 | "required": ["transaction_id", "total_amount", "details"], 116 | "type": "object" 117 | } 118 | }, 119 | "required": [ 120 | "customer", 121 | "transaction", 122 | "customer_summary", 123 | "transaction_summary" 124 | ], 125 | "type": "object" 126 | } 127 | ``` -------------------------------------------------------------------------------- /tests/clients/test_embedding.py: -------------------------------------------------------------------------------- ```python 1 | from unittest.mock import patch 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | import dspy 7 | from dspy.clients.embedding import Embedder 8 | 9 | 10 | # Mock response format similar to litellm's embedding response. 11 | class MockEmbeddingResponse: 12 | def __init__(self, embeddings): 13 | self.data = [{"embedding": emb} for emb in embeddings] 14 | self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} 15 | self.model = "mock_model" 16 | self.object = "list" 17 | 18 | 19 | @pytest.fixture 20 | def cache(tmp_path): 21 | original_cache = dspy.cache 22 | dspy.configure_cache(disk_cache_dir=tmp_path / ".dspy_cache") 23 | yield 24 | dspy.cache = original_cache 25 | 26 | 27 | def test_litellm_embedding(cache): 28 | model = "text-embedding-ada-002" 29 | inputs = ["hello", "world"] 30 | mock_embeddings = [ 31 | [0.1, 0.2, 0.3], # embedding for "hello" 32 | [0.4, 0.5, 0.6], # embedding for "world" 33 | ] 34 | 35 | with patch("litellm.embedding") as mock_litellm: 36 | # Configure mock to return proper response format. 37 | mock_litellm.return_value = MockEmbeddingResponse(mock_embeddings) 38 | 39 | # Create embedding instance and call it. 40 | embedding = Embedder(model, caching=True) 41 | result = embedding(inputs) 42 | 43 | # Verify litellm was called with correct parameters. 44 | # Because we disable the litellm cache, it should be called with caching=False. 45 | mock_litellm.assert_called_once_with(model=model, input=inputs, caching=False) 46 | 47 | assert len(result) == len(inputs) 48 | np.testing.assert_allclose(result, mock_embeddings) 49 | 50 | # Second call should be cached. 51 | result = embedding(inputs) 52 | assert mock_litellm.call_count == 1 53 | np.testing.assert_allclose(result, mock_embeddings) 54 | 55 | # Disable cache should issue new calls. 56 | embedding = Embedder(model, caching=False) 57 | result = embedding(inputs) 58 | assert mock_litellm.call_count == 2 59 | np.testing.assert_allclose(result, mock_embeddings) 60 | 61 | 62 | def test_callable_embedding(cache): 63 | inputs = ["hello", "world", "test"] 64 | 65 | expected_embeddings = [ 66 | [0.1, 0.2, 0.3], # embedding for "hello" 67 | [0.4, 0.5, 0.6], # embedding for "world" 68 | [0.7, 0.8, 0.9], # embedding for "test" 69 | ] 70 | 71 | class EmbeddingFn: 72 | def __init__(self): 73 | self.call_count = 0 74 | 75 | def __call__(self, texts): 76 | # Simple callable that returns random embeddings. 77 | self.call_count += 1 78 | return expected_embeddings 79 | 80 | embedding_fn = EmbeddingFn() 81 | 82 | # Create embedding instance with callable 83 | embedding = Embedder(embedding_fn) 84 | result = embedding(inputs) 85 | 86 | assert embedding_fn.call_count == 1 87 | np.testing.assert_allclose(result, expected_embeddings) 88 | 89 | result = embedding(inputs) 90 | # The second call should be cached. 91 | assert embedding_fn.call_count == 1 92 | np.testing.assert_allclose(result, expected_embeddings) 93 | 94 | 95 | def test_invalid_model_type(): 96 | # Test that invalid model type raises ValueError 97 | with pytest.raises(ValueError): 98 | embedding = Embedder(123) # Invalid model type 99 | embedding(["test"]) 100 | 101 | 102 | @pytest.mark.asyncio 103 | async def test_async_embedding(): 104 | model = "text-embedding-ada-002" 105 | inputs = ["hello", "world"] 106 | mock_embeddings = [ 107 | [0.1, 0.2, 0.3], # embedding for "hello" 108 | [0.4, 0.5, 0.6], # embedding for "world" 109 | ] 110 | 111 | with patch("litellm.aembedding") as mock_litellm: 112 | # Configure mock to return proper response format. 113 | mock_litellm.return_value = MockEmbeddingResponse(mock_embeddings) 114 | 115 | # Create embedding instance and call it. 116 | embedding = Embedder(model, caching=False) 117 | result = await embedding.acall(inputs) 118 | 119 | # Verify litellm was called with correct parameters. 120 | mock_litellm.assert_called_once_with(model=model, input=inputs, caching=False) 121 | 122 | assert len(result) == len(inputs) 123 | np.testing.assert_allclose(result, mock_embeddings) 124 | ``` -------------------------------------------------------------------------------- /dspy/datasets/dataset.py: -------------------------------------------------------------------------------- ```python 1 | import random 2 | import uuid 3 | 4 | from dspy import Example 5 | from dspy.dsp.utils import dotdict 6 | 7 | 8 | class Dataset: 9 | def __init__(self, train_seed=0, train_size=None, eval_seed=0, dev_size=None, test_size=None, input_keys=None): 10 | self.train_size = train_size 11 | self.train_seed = train_seed 12 | self.dev_size = dev_size 13 | self.dev_seed = eval_seed 14 | self.test_size = test_size 15 | self.test_seed = eval_seed 16 | self.input_keys = input_keys or [] 17 | 18 | self.do_shuffle = True 19 | 20 | self.name = self.__class__.__name__ 21 | 22 | def reset_seeds(self, train_seed=None, train_size=None, eval_seed=None, dev_size=None, test_size=None): 23 | self.train_size = train_size or self.train_size 24 | self.train_seed = train_seed or self.train_seed 25 | self.dev_size = dev_size or self.dev_size 26 | self.dev_seed = eval_seed or self.dev_seed 27 | self.test_size = test_size or self.test_size 28 | self.test_seed = eval_seed or self.test_seed 29 | 30 | if hasattr(self, "_train_"): 31 | del self._train_ 32 | 33 | if hasattr(self, "_dev_"): 34 | del self._dev_ 35 | 36 | if hasattr(self, "_test_"): 37 | del self._test_ 38 | 39 | @property 40 | def train(self): 41 | if not hasattr(self, "_train_"): 42 | self._train_ = self._shuffle_and_sample("train", self._train, self.train_size, self.train_seed) 43 | 44 | return self._train_ 45 | 46 | @property 47 | def dev(self): 48 | if not hasattr(self, "_dev_"): 49 | self._dev_ = self._shuffle_and_sample("dev", self._dev, self.dev_size, self.dev_seed) 50 | 51 | return self._dev_ 52 | 53 | @property 54 | def test(self): 55 | if not hasattr(self, "_test_"): 56 | self._test_ = self._shuffle_and_sample("test", self._test, self.test_size, self.test_seed) 57 | 58 | return self._test_ 59 | 60 | def _shuffle_and_sample(self, split, data, size, seed=0): 61 | data = list(data) 62 | 63 | # Shuffle the data irrespective of the requested size. 64 | base_rng = random.Random(seed) 65 | 66 | if self.do_shuffle: 67 | base_rng.shuffle(data) 68 | 69 | data = data[:size] 70 | output = [] 71 | 72 | for example in data: 73 | example_obj = Example(**example, dspy_uuid=str(uuid.uuid4()), dspy_split=split) 74 | if self.input_keys: 75 | example_obj = example_obj.with_inputs(*self.input_keys) 76 | output.append(example_obj) 77 | # TODO: NOTE: Ideally we use these uuids for dedup internally, for demos and internal train/val splits. 78 | # Now, some tasks (like convQA and Colors) have overlapping examples. Here, we should allow the user to give us 79 | # a uuid field that would respect this in some way. This means that we need a more refined concept that 80 | # uuid (each example is unique) and more like a group_uuid. 81 | 82 | return output 83 | 84 | @classmethod 85 | def prepare_by_seed( 86 | cls, 87 | train_seeds=None, 88 | train_size=16, 89 | dev_size=1000, 90 | divide_eval_per_seed=True, 91 | eval_seed=2023, 92 | **kwargs, 93 | ): 94 | train_seeds = train_seeds or [1, 2, 3, 4, 5] 95 | data_args = dotdict(train_size=train_size, eval_seed=eval_seed, dev_size=dev_size, test_size=0, **kwargs) 96 | dataset = cls(**data_args) 97 | 98 | eval_set = dataset.dev 99 | eval_sets, train_sets = [], [] 100 | 101 | examples_per_seed = dev_size // len(train_seeds) if divide_eval_per_seed else dev_size 102 | eval_offset = 0 103 | 104 | for train_seed in train_seeds: 105 | data_args.train_seed = train_seed 106 | dataset.reset_seeds(**data_args) 107 | 108 | eval_sets.append(eval_set[eval_offset : eval_offset + examples_per_seed]) 109 | train_sets.append(dataset.train) 110 | 111 | assert len(eval_sets[-1]) == examples_per_seed, len(eval_sets[-1]) 112 | assert len(train_sets[-1]) == train_size, len(train_sets[-1]) 113 | 114 | if divide_eval_per_seed: 115 | eval_offset += examples_per_seed 116 | 117 | return dotdict(train_sets=train_sets, eval_sets=eval_sets) 118 | ``` -------------------------------------------------------------------------------- /dspy/adapters/xml_adapter.py: -------------------------------------------------------------------------------- ```python 1 | import re 2 | from typing import Any 3 | 4 | from pydantic.fields import FieldInfo 5 | 6 | from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName 7 | from dspy.adapters.utils import format_field_value, translate_field_type 8 | from dspy.signatures.signature import Signature 9 | from dspy.utils.callback import BaseCallback 10 | 11 | 12 | class XMLAdapter(ChatAdapter): 13 | def __init__(self, callbacks: list[BaseCallback] | None = None): 14 | super().__init__(callbacks) 15 | self.field_pattern = re.compile(r"<(?P<name>\w+)>((?P<content>.*?))</\1>", re.DOTALL) 16 | 17 | def format_field_with_value(self, fields_with_values: dict[FieldInfoWithName, Any]) -> str: 18 | output = [] 19 | for field, field_value in fields_with_values.items(): 20 | formatted = format_field_value(field_info=field.info, value=field_value) 21 | output.append(f"<{field.name}>\n{formatted}\n</{field.name}>") 22 | return "\n\n".join(output).strip() 23 | 24 | def format_field_structure(self, signature: type[Signature]) -> str: 25 | """ 26 | XMLAdapter requires input and output fields to be wrapped in XML tags like `<field_name>`. 27 | """ 28 | 29 | parts = [] 30 | parts.append("All interactions will be structured in the following way, with the appropriate values filled in.") 31 | 32 | def format_signature_fields_for_instructions(fields: dict[str, FieldInfo]): 33 | return self.format_field_with_value( 34 | fields_with_values={ 35 | FieldInfoWithName(name=field_name, info=field_info): translate_field_type(field_name, field_info) 36 | for field_name, field_info in fields.items() 37 | }, 38 | ) 39 | 40 | parts.append(format_signature_fields_for_instructions(signature.input_fields)) 41 | parts.append(format_signature_fields_for_instructions(signature.output_fields)) 42 | return "\n\n".join(parts).strip() 43 | 44 | def format_assistant_message_content( 45 | self, 46 | signature: type[Signature], 47 | outputs: dict[str, Any], 48 | missing_field_message=None, 49 | ) -> str: 50 | return self.format_field_with_value( 51 | { 52 | FieldInfoWithName(name=k, info=v): outputs.get(k, missing_field_message) 53 | for k, v in signature.output_fields.items() 54 | }, 55 | ) 56 | 57 | def user_message_output_requirements(self, signature: type[Signature]) -> str: 58 | message = "Respond with the corresponding output fields wrapped in XML tags " 59 | message += ", then ".join(f"`<{f}>`" for f in signature.output_fields) 60 | message += "." 61 | return message 62 | 63 | def parse(self, signature: type[Signature], completion: str) -> dict[str, Any]: 64 | fields = {} 65 | for match in self.field_pattern.finditer(completion): 66 | name = match.group("name") 67 | content = match.group("content").strip() 68 | if name in signature.output_fields and name not in fields: 69 | fields[name] = content 70 | # Cast values using base class parse_value helper 71 | for k, v in fields.items(): 72 | fields[k] = self._parse_field_value(signature.output_fields[k], v, completion, signature) 73 | if fields.keys() != signature.output_fields.keys(): 74 | from dspy.utils.exceptions import AdapterParseError 75 | 76 | raise AdapterParseError( 77 | adapter_name="XMLAdapter", 78 | signature=signature, 79 | lm_response=completion, 80 | parsed_result=fields, 81 | ) 82 | return fields 83 | 84 | def _parse_field_value(self, field_info, raw, completion, signature): 85 | from dspy.adapters.utils import parse_value 86 | 87 | try: 88 | return parse_value(raw, field_info.annotation) 89 | except Exception as e: 90 | from dspy.utils.exceptions import AdapterParseError 91 | 92 | raise AdapterParseError( 93 | adapter_name="XMLAdapter", 94 | signature=signature, 95 | lm_response=completion, 96 | message=f"Failed to parse field {field_info} with value {raw}: {e}", 97 | ) 98 | ``` -------------------------------------------------------------------------------- /docs/scripts/generate_api_summary.py: -------------------------------------------------------------------------------- ```python 1 | from pathlib import Path 2 | 3 | INDEX_NAME = { 4 | "models": "Models", 5 | "primitives": "Primitives", 6 | "signatures": "Signatures", 7 | "adapters": "Adapters", 8 | "modules": "Modules", 9 | "evaluation": "Evaluation", 10 | "optimizers": "Optimizers", 11 | "utils": "Utils", 12 | "tools": "Tools", 13 | "experimental": "Experimental", 14 | } 15 | 16 | 17 | def build_nav_structure(directory: Path, base_path: Path) -> dict: 18 | """Recursively build navigation structure for a directory.""" 19 | nav = {} 20 | 21 | # Get all items in current directory 22 | items = sorted(directory.iterdir()) 23 | 24 | for path in items: 25 | if path.suffix == ".md": 26 | name = path.stem 27 | nav[name] = str(path.relative_to(base_path)) 28 | elif path.is_dir() and path.name == "GEPA": 29 | nav["GEPA"] = { 30 | "2. GEPA Advanced": "api/optimizers/GEPA/GEPA_Advanced.md", 31 | "1. GEPA Overview": "api/optimizers/GEPA/overview.md", 32 | } 33 | 34 | return nav 35 | 36 | 37 | def format_nav_section(nav_dict, indent_level=2): 38 | """Convert dictionary to properly indented nav section""" 39 | lines = [] 40 | indent = " " * indent_level 41 | 42 | module_navs = [] 43 | file_navs = [] 44 | for key, value in sorted(nav_dict.items()): 45 | if isinstance(value, dict): 46 | # This is a section 47 | module_navs.append(f"{indent}- {key}:") 48 | module_navs.extend(format_nav_section(value, indent_level + 1)) 49 | else: 50 | # This is a file 51 | file_navs.append(f"{indent}- {key}: {value}") 52 | 53 | # Put submodules' nav items before file nav items. e.g., `dspy.evaluate` before `dspy.ChainOfThought` 54 | # in the nav section. 55 | lines.extend(module_navs) 56 | lines.extend(file_navs) 57 | 58 | return lines 59 | 60 | 61 | def read_mkdocs_sections(filename: str = "mkdocs.yml"): 62 | """Read and parse the mkdocs.yml file into sections.""" 63 | with open(filename, "r") as f: 64 | lines = f.readlines() 65 | 66 | nav_start = -1 67 | theme_start = -1 68 | 69 | # Find section boundaries 70 | for i, line in enumerate(lines): 71 | if line.strip() == "nav:": 72 | nav_start = i 73 | elif line.strip() == "theme:": 74 | theme_start = i 75 | break 76 | 77 | # Split content into sections 78 | pre_nav = lines[: nav_start + 1] # Include the 'nav:' line 79 | nav_content = [] 80 | post_theme = lines[theme_start:] # Start from 'theme:' line 81 | 82 | # Extract nav content excluding API Reference 83 | i = nav_start + 1 84 | while i < theme_start: 85 | line = lines[i] 86 | if line.strip() == "- API Reference:": 87 | # Skip this line and all indented lines that follow 88 | i += 1 89 | while i < theme_start and (not lines[i].strip() or lines[i].startswith(" " * 8)): 90 | i += 1 91 | else: 92 | nav_content.append(line) 93 | i += 1 94 | 95 | return pre_nav, nav_content, post_theme 96 | 97 | 98 | def generate_api_nav(): 99 | """Generate the API navigation structure.""" 100 | api_nav = {} 101 | api_path = Path("docs/api") 102 | 103 | for dir_path in sorted(api_path.iterdir()): 104 | if dir_path.is_dir(): 105 | category = INDEX_NAME[dir_path.name] 106 | api_nav[category] = build_nav_structure(dir_path, Path("docs")) 107 | 108 | return api_nav 109 | 110 | 111 | def main(): 112 | """Main function to generate the API documentation summary.""" 113 | # Read existing mkdocs.yml sections 114 | pre_nav, nav_content, post_theme = read_mkdocs_sections() 115 | 116 | # Generate API navigation structure 117 | api_nav = generate_api_nav() 118 | 119 | # Create API section 120 | api_section = [" - API Reference:"] 121 | api_section.append(" - API Reference: api/index.md") 122 | api_section.extend(format_nav_section(api_nav)) 123 | api_section.append("") # Add empty line before theme section 124 | 125 | # Write back to mkdocs.yml 126 | with open("mkdocs.yml", "w") as f: 127 | # Write pre-nav content 128 | f.writelines(pre_nav) 129 | # Write nav content 130 | f.writelines(nav_content) 131 | # Add API section 132 | f.write("\n".join(api_section) + "\n") 133 | # Write post-theme content 134 | f.writelines(post_theme) 135 | 136 | 137 | if __name__ == "__main__": 138 | main() 139 | ``` -------------------------------------------------------------------------------- /dspy/primitives/example.py: -------------------------------------------------------------------------------- ```python 1 | class Example: 2 | def __init__(self, base=None, **kwargs): 3 | # Internal storage and other attributes 4 | self._store = {} 5 | self._demos = [] 6 | self._input_keys = None 7 | 8 | # Initialize from a base Example if provided 9 | if base and isinstance(base, type(self)): 10 | self._store = base._store.copy() 11 | 12 | # Initialize from a dict if provided 13 | elif base and isinstance(base, dict): 14 | self._store = base.copy() 15 | 16 | # Update with provided kwargs 17 | self._store.update(kwargs) 18 | 19 | def __getattr__(self, key): 20 | if key.startswith("__") and key.endswith("__"): 21 | raise AttributeError 22 | if key in self._store: 23 | return self._store[key] 24 | raise AttributeError(f"'{type(self).__name__}' object has no attribute '{key}'") 25 | 26 | def __setattr__(self, key, value): 27 | if key.startswith("_") or key in dir(self.__class__): 28 | super().__setattr__(key, value) 29 | else: 30 | self._store[key] = value 31 | 32 | def __getitem__(self, key): 33 | return self._store[key] 34 | 35 | def __setitem__(self, key, value): 36 | self._store[key] = value 37 | 38 | def __delitem__(self, key): 39 | del self._store[key] 40 | 41 | def __contains__(self, key): 42 | return key in self._store 43 | 44 | def __len__(self): 45 | return len([k for k in self._store if not k.startswith("dspy_")]) 46 | 47 | def __repr__(self): 48 | # return f"Example({self._store})" + f" (input_keys={self._input_keys}, demos={self._demos})" 49 | d = {k: v for k, v in self._store.items() if not k.startswith("dspy_")} 50 | return f"Example({d})" + f" (input_keys={self._input_keys})" 51 | 52 | def __str__(self): 53 | return self.__repr__() 54 | 55 | def __eq__(self, other): 56 | return isinstance(other, Example) and self._store == other._store 57 | 58 | def __hash__(self): 59 | return hash(tuple(self._store.items())) 60 | 61 | def keys(self, include_dspy=False): 62 | return [k for k in self._store.keys() if not k.startswith("dspy_") or include_dspy] 63 | 64 | def values(self, include_dspy=False): 65 | return [v for k, v in self._store.items() if not k.startswith("dspy_") or include_dspy] 66 | 67 | def items(self, include_dspy=False): 68 | return [(k, v) for k, v in self._store.items() if not k.startswith("dspy_") or include_dspy] 69 | 70 | def get(self, key, default=None): 71 | return self._store.get(key, default) 72 | 73 | def with_inputs(self, *keys): 74 | copied = self.copy() 75 | copied._input_keys = set(keys) 76 | return copied 77 | 78 | def inputs(self): 79 | if self._input_keys is None: 80 | raise ValueError("Inputs have not been set for this example. Use `example.with_inputs()` to set them.") 81 | 82 | # return items that are in input_keys 83 | d = {key: self._store[key] for key in self._store if key in self._input_keys} 84 | # return type(self)(d) 85 | new_instance = type(self)(base=d) 86 | new_instance._input_keys = self._input_keys # Preserve input_keys in new instance 87 | return new_instance 88 | 89 | def labels(self): 90 | # return items that are NOT in input_keys 91 | input_keys = self.inputs().keys() 92 | d = {key: self._store[key] for key in self._store if key not in input_keys} 93 | return type(self)(d) 94 | 95 | def __iter__(self): 96 | return iter(dict(self._store)) 97 | 98 | def copy(self, **kwargs): 99 | return type(self)(base=self, **kwargs) 100 | 101 | def without(self, *keys): 102 | copied = self.copy() 103 | for key in keys: 104 | del copied[key] 105 | return copied 106 | 107 | def toDict(self): # noqa: N802 108 | def convert_to_serializable(value): 109 | if hasattr(value, "toDict"): 110 | return value.toDict() 111 | elif isinstance(value, list): 112 | return [convert_to_serializable(item) for item in value] 113 | elif isinstance(value, dict): 114 | return {k: convert_to_serializable(v) for k, v in value.items()} 115 | else: 116 | return value 117 | 118 | serializable_store = {} 119 | for k, v in self._store.items(): 120 | serializable_store[k] = convert_to_serializable(v) 121 | 122 | return serializable_store 123 | ``` -------------------------------------------------------------------------------- /tests/signatures/test_custom_types.py: -------------------------------------------------------------------------------- ```python 1 | 2 | import pydantic 3 | import pytest 4 | 5 | import dspy 6 | from dspy import Signature 7 | 8 | 9 | def test_basic_custom_type_resolution(): 10 | """Test basic custom type resolution with both explicit and automatic mapping.""" 11 | class CustomType(pydantic.BaseModel): 12 | value: str 13 | 14 | # Custom types can be explicitly mapped 15 | explicit_sig = Signature( 16 | "input: CustomType -> output: str", 17 | custom_types={"CustomType": CustomType} 18 | ) 19 | assert explicit_sig.input_fields["input"].annotation == CustomType 20 | 21 | # Custom types can also be auto-resolved from caller's scope 22 | auto_sig = Signature("input: CustomType -> output: str") 23 | assert auto_sig.input_fields["input"].annotation == CustomType 24 | 25 | 26 | def test_type_alias_for_nested_types(): 27 | """Test using type aliases for nested types.""" 28 | class Container: 29 | class NestedType(pydantic.BaseModel): 30 | value: str 31 | 32 | NestedType = Container.NestedType 33 | alias_sig = Signature("input: str -> output: NestedType") 34 | assert alias_sig.output_fields["output"].annotation == Container.NestedType 35 | 36 | class Container2: 37 | class Query(pydantic.BaseModel): 38 | text: str 39 | class Score(pydantic.BaseModel): 40 | score: float 41 | 42 | signature = dspy.Signature("query: Container2.Query -> score: Container2.Score") 43 | assert signature.output_fields["score"].annotation == Container2.Score 44 | 45 | 46 | class GlobalCustomType(pydantic.BaseModel): 47 | """A type defined at module level for testing module-level resolution.""" 48 | value: str 49 | notes: str = "" 50 | 51 | 52 | def test_module_level_type_resolution(): 53 | """Test resolution of types defined at module level.""" 54 | # Module-level types can be auto-resolved 55 | sig = Signature("name: str -> result: GlobalCustomType") 56 | assert sig.output_fields["result"].annotation == GlobalCustomType 57 | 58 | 59 | # Create module-level nested class for testing 60 | class OuterContainer: 61 | class InnerType(pydantic.BaseModel): 62 | name: str 63 | value: int 64 | 65 | 66 | def test_recommended_patterns(): 67 | """Test recommended patterns for working with custom types in signatures.""" 68 | 69 | # PATTERN 1: Local type with auto-resolution 70 | class LocalType(pydantic.BaseModel): 71 | value: str 72 | 73 | sig1 = Signature("input: str -> output: LocalType") 74 | assert sig1.output_fields["output"].annotation == LocalType 75 | 76 | # PATTERN 2: Module-level type with auto-resolution 77 | sig2 = Signature("input: str -> output: GlobalCustomType") 78 | assert sig2.output_fields["output"].annotation == GlobalCustomType 79 | 80 | # PATTERN 3: Nested type with dot notation 81 | sig3 = Signature("input: str -> output: OuterContainer.InnerType") 82 | assert sig3.output_fields["output"].annotation == OuterContainer.InnerType 83 | 84 | # PATTERN 4: Nested type using alias 85 | InnerTypeAlias = OuterContainer.InnerType 86 | sig4 = Signature("input: str -> output: InnerTypeAlias") 87 | assert sig4.output_fields["output"].annotation == InnerTypeAlias 88 | 89 | # PATTERN 5: Nested type with dot notation 90 | sig5 = Signature("input: str -> output: OuterContainer.InnerType") 91 | assert sig5.output_fields["output"].annotation == OuterContainer.InnerType 92 | 93 | def test_expected_failure(): 94 | # InnerType DNE when not OuterContainer.InnerTypes, so this type shouldnt be resolved 95 | with pytest.raises(ValueError): 96 | Signature("input: str -> output: InnerType") 97 | 98 | def test_module_type_resolution(): 99 | class TestModule(dspy.Module): 100 | def __init__(self): 101 | super().__init__() 102 | self.predict = dspy.Predict("input: str -> output: OuterContainer.InnerType") 103 | 104 | def predict(self, input: str) -> str: 105 | return input 106 | 107 | module = TestModule() 108 | sig = module.predict.signature 109 | assert sig.output_fields["output"].annotation == OuterContainer.InnerType 110 | 111 | def test_basic_custom_type_resolution(): 112 | class CustomType(pydantic.BaseModel): 113 | value: str 114 | 115 | sig = Signature("input: CustomType -> output: str", custom_types={"CustomType": CustomType}) 116 | assert sig.input_fields["input"].annotation == CustomType 117 | 118 | sig = Signature("input: CustomType -> output: str") 119 | assert sig.input_fields["input"].annotation == CustomType 120 | ``` -------------------------------------------------------------------------------- /docs/overrides/home.html: -------------------------------------------------------------------------------- ```html 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | <style> 5 | .md-main__inner .md-grid { 6 | padding: 0; 7 | margin: 0; 8 | } 9 | 10 | .content-container { 11 | max-width: 100%; 12 | margin: 0; 13 | padding: 0; 14 | } 15 | 16 | .hero { 17 | text-align: center; 18 | padding: 4rem 2rem; 19 | margin: 0; 20 | background-color: #f5f6f77a; 21 | color: white; 22 | } 23 | 24 | .hero-logo { 25 | max-width: 15rem; 26 | height: auto; 27 | margin: 0 auto; 28 | } 29 | 30 | .hero-subtitle { 31 | font-size: 1.2rem; 32 | margin: 1.5rem 0; 33 | color: #e2e8f0; 34 | } 35 | 36 | .cta-button { 37 | display: inline-block; 38 | padding: 0.75rem 1.5rem; 39 | background-color: transparent; 40 | color: black; 41 | text-decoration: none; 42 | border-radius: 0.375rem; 43 | font-weight: 600; 44 | border: 2px solid black; 45 | transition: all 0.3s ease; 46 | } 47 | 48 | .cta-button:hover { 49 | background-color: white; 50 | color: black; 51 | border: 2px solid white; 52 | } 53 | 54 | .features-section { 55 | padding: 4rem 2rem; 56 | } 57 | 58 | .features-title { 59 | text-align: center; 60 | font-size: 2rem; 61 | font-weight: 700; 62 | margin-bottom: 3rem; 63 | color: #1a202c; 64 | } 65 | 66 | .features-grid { 67 | display: grid; 68 | grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); 69 | gap: 3rem; 70 | max-width: 1200px; 71 | margin: 0 auto; 72 | } 73 | 74 | .feature-card { 75 | text-align: center; 76 | padding: 1.5rem; 77 | } 78 | 79 | .feature-image { 80 | width: 10rem; 81 | height: auto; 82 | margin: 0 auto 1.5rem; 83 | } 84 | 85 | .feature-title { 86 | font-size: 1.25rem; 87 | font-weight: 700; 88 | margin-bottom: 1rem; 89 | color: #2d3748; 90 | } 91 | 92 | .feature-description { 93 | color: #4a5568; 94 | line-height: 1.5; 95 | } 96 | 97 | @media (max-width: 768px) { 98 | .hero { 99 | padding: 3rem 1rem; 100 | } 101 | 102 | .hero-logo { 103 | max-width: 10rem; 104 | } 105 | 106 | .features-grid { 107 | grid-template-columns: 1fr; 108 | gap: 2rem; 109 | } 110 | 111 | .feature-card { 112 | padding: 1rem; 113 | } 114 | } 115 | </style> 116 | 117 | <div class="content-container"> 118 | <div class="hero"> 119 | <img src="{{ 'static/img/dspy_logo.png' | url }}" alt="DSPy Logo" class="hero-logo"> 120 | <p class="hero-subtitle">Programming—not prompting—Language Models</p> 121 | <a href="{{ 'quick-start/getting-started-1' | url }}" class="cta-button">Get Started with DSPy</a> 122 | </div> 123 | 124 | <div class="features-section"> 125 | <h2 class="features-title">The Way of DSPy</h2> 126 | <div class="features-grid"> 127 | <div class="feature-card"> 128 | <img src="{{ 'static/img/optimize.png' | url }}" alt="Systematic Optimization" class="feature-image"> 129 | <h3 class="feature-title">Systematic Optimization</h3> 130 | <p class="feature-description">Choose from a range of optimizers to enhance your program. Whether it's generating refined instructions, or fine-tuning weights, DSPy's optimizers are engineered to maximize efficiency and effectiveness.</p> 131 | </div> 132 | 133 | <div class="feature-card"> 134 | <img src="{{ 'static/img/modular.png' | url }}" alt="Modular Approach" class="feature-image"> 135 | <h3 class="feature-title">Modular Approach</h3> 136 | <p class="feature-description">With DSPy, you can build your system using predefined modules, replacing intricate prompting techniques with straightforward, effective solutions.</p> 137 | </div> 138 | 139 | <div class="feature-card"> 140 | <img src="{{ 'static/img/universal_compatibility.png' | url }}" alt="Cross-LM Compatibility" class="feature-image"> 141 | <h3 class="feature-title">Cross-LM Compatibility</h3> 142 | <p class="feature-description">Whether you're working with powerhouse models like GPT-3.5 or GPT-4, or local models such as T5-base or Llama2-13b, DSPy seamlessly integrates and enhances their performance in your system.</p> 143 | </div> 144 | </div> 145 | </div> 146 | </div> 147 | {% endblock %} ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/MIPROv2.md: -------------------------------------------------------------------------------- ```markdown 1 | # dspy.MIPROv2 2 | 3 | `MIPROv2` (<u>M</u>ultiprompt <u>I</u>nstruction <u>PR</u>oposal <u>O</u>ptimizer Version 2) is an prompt optimizer capable of optimizing both instructions and few-shot examples jointly. It does this by bootstrapping few-shot example candidates, proposing instructions grounded in different dynamics of the task, and finding an optimized combination of these options using Bayesian Optimization. It can be used for optimizing few-shot examples & instructions jointly, or just instructions for 0-shot optimization. 4 | 5 | <!-- START_API_REF --> 6 | ::: dspy.MIPROv2 7 | handler: python 8 | options: 9 | members: 10 | - compile 11 | - get_params 12 | show_source: true 13 | show_root_heading: true 14 | heading_level: 2 15 | docstring_style: google 16 | show_root_full_path: true 17 | show_object_full_path: false 18 | separate_signature: false 19 | inherited_members: true 20 | ::: 21 | <!-- END_API_REF --> 22 | 23 | ## Example Usage 24 | 25 | The program below shows optimizing a math program with MIPROv2 26 | 27 | ```python 28 | import dspy 29 | from dspy.datasets.gsm8k import GSM8K, gsm8k_metric 30 | 31 | # Import the optimizer 32 | from dspy.teleprompt import MIPROv2 33 | 34 | # Initialize the LM 35 | lm = dspy.LM('openai/gpt-4o-mini', api_key='YOUR_OPENAI_API_KEY') 36 | dspy.configure(lm=lm) 37 | 38 | # Initialize optimizer 39 | teleprompter = MIPROv2( 40 | metric=gsm8k_metric, 41 | auto="medium", # Can choose between light, medium, and heavy optimization runs 42 | ) 43 | 44 | # Optimize program 45 | print(f"Optimizing program with MIPROv2...") 46 | gsm8k = GSM8K() 47 | optimized_program = teleprompter.compile( 48 | dspy.ChainOfThought("question -> answer"), 49 | trainset=gsm8k.train, 50 | ) 51 | 52 | # Save optimize program for future use 53 | optimized_program.save(f"optimized.json") 54 | ``` 55 | 56 | ## How `MIPROv2` works 57 | 58 | At a high level, `MIPROv2` works by creating both few-shot examples and new instructions for each predictor in your LM program, and then searching over these using Bayesian Optimization to find the best combination of these variables for your program. If you want a visual explanation check out this [twitter thread](https://x.com/michaelryan207/status/1804189184988713065). 59 | 60 | These steps are broken down in more detail below: 61 | 62 | 1) **Bootstrap Few-Shot Examples**: Randomly samples examples from your training set, and run them through your LM program. If the output from the program is correct for this example, it is kept as a valid few-shot example candidate. Otherwise, we try another example until we've curated the specified amount of few-shot example candidates. This step creates `num_candidates` sets of `max_bootstrapped_demos` bootstrapped examples and `max_labeled_demos` basic examples sampled from the training set. 63 | 64 | 2) **Propose Instruction Candidates**. The instruction proposer includes (1) a generated summary of properties of the training dataset, (2) a generated summary of your LM program's code and the specific predictor that an instruction is being generated for, (3) the previously bootstrapped few-shot examples to show reference inputs / outputs for a given predictor and (4) a randomly sampled tip for generation (i.e. "be creative", "be concise", etc.) to help explore the feature space of potential instructions. This context is provided to a `prompt_model` which writes high quality instruction candidates. 65 | 66 | 3) **Find an Optimized Combination of Few-Shot Examples & Instructions**. Finally, we use Bayesian Optimization to choose which combinations of instructions and demonstrations work best for each predictor in our program. This works by running a series of `num_trials` trials, where a new set of prompts are evaluated over our validation set at each trial. The new set of prompts are only evaluated on a minibatch of size `minibatch_size` at each trial (when `minibatch`=`True`). The best averaging set of prompts is then evaluated on the full validation set every `minibatch_full_eval_steps`. At the end of the optimization process, the LM program with the set of prompts that performed best on the full validation set is returned. 67 | 68 | For those interested in more details, more information on `MIPROv2` along with a study on `MIPROv2` compared with other DSPy optimizers can be found in [this paper](https://arxiv.org/abs/2406.11695). 69 | ``` -------------------------------------------------------------------------------- /tests/utils/test_mcp.py: -------------------------------------------------------------------------------- ```python 1 | import asyncio 2 | import importlib 3 | 4 | import pytest 5 | 6 | from dspy.utils.mcp import convert_mcp_tool 7 | 8 | if importlib.util.find_spec("mcp") is None: 9 | pytest.skip(reason="mcp is not installed", allow_module_level=True) 10 | 11 | 12 | @pytest.mark.asyncio 13 | @pytest.mark.extra 14 | async def test_convert_mcp_tool(): 15 | from mcp import ClientSession, StdioServerParameters 16 | from mcp.client.stdio import stdio_client 17 | server_params = StdioServerParameters( 18 | command="python", 19 | args=["tests/utils/resources/mcp_server.py"], 20 | env=None, 21 | ) 22 | async with stdio_client(server_params) as (read, write): 23 | async with ClientSession(read, write) as session: 24 | await asyncio.wait_for(session.initialize(), timeout=5) 25 | response = await session.list_tools() 26 | 27 | # Check add 28 | add_tool = convert_mcp_tool(session, response.tools[0]) 29 | assert add_tool.name == "add" 30 | assert add_tool.desc == "Add two numbers" 31 | assert add_tool.args == {"a": {"title": "A", "type": "integer"}, "b": {"title": "B", "type": "integer"}} 32 | assert add_tool.arg_types == {"a": int, "b": int} 33 | assert add_tool.arg_desc == { 34 | "a": "No description provided. (Required)", 35 | "b": "No description provided. (Required)", 36 | } 37 | assert await add_tool.acall(a=1, b=2) == "3" 38 | 39 | # Check hello 40 | hello_tool = convert_mcp_tool(session, response.tools[1]) 41 | assert hello_tool.name == "hello" 42 | assert hello_tool.desc == "Greet people" 43 | assert hello_tool.args == {"names": {"title": "Names", "type": "array", "items": {"type": "string"}}} 44 | assert hello_tool.arg_types == {"names": list} 45 | assert hello_tool.arg_desc == {"names": "No description provided. (Required)"} 46 | assert await hello_tool.acall(names=["Bob", "Tom"]) == ["Hello, Bob!", "Hello, Tom!"] 47 | 48 | # Check error handling 49 | error_tool = convert_mcp_tool(session, response.tools[2]) 50 | assert error_tool.name == "wrong_tool" 51 | assert error_tool.desc == "This tool raises an error" 52 | with pytest.raises( 53 | RuntimeError, match="Failed to call a MCP tool: Error executing tool wrong_tool: error!" 54 | ): 55 | await error_tool.acall() 56 | 57 | # Check nested Pydantic arg 58 | nested_pydantic_tool = convert_mcp_tool(session, response.tools[3]) 59 | 60 | assert nested_pydantic_tool.name == "get_account_name" 61 | assert nested_pydantic_tool.desc == "This extracts the name from account" 62 | assert nested_pydantic_tool.args == { 63 | "account": { 64 | "title": "Account", 65 | "type": "object", 66 | "required": ["profile", "account_id"], 67 | "properties": { 68 | "profile": { 69 | "title": "Profile", 70 | "type": "object", 71 | "properties": { 72 | "name": {"title": "Name", "type": "string"}, 73 | "age": {"title": "Age", "type": "integer"}, 74 | }, 75 | "required": ["name", "age"], 76 | }, 77 | "account_id": {"title": "Account Id", "type": "string"}, 78 | }, 79 | } 80 | } 81 | account_in_json = { 82 | "profile": { 83 | "name": "Bob", 84 | "age": 20, 85 | }, 86 | "account_id": "123", 87 | } 88 | result = await nested_pydantic_tool.acall(account=account_in_json) 89 | assert result == "Bob" 90 | 91 | # Check no input parameter current_datetime tool 92 | current_datetime_tool = convert_mcp_tool(session, response.tools[4]) 93 | assert current_datetime_tool.name == "current_datetime" 94 | assert current_datetime_tool.desc == "Get the current datetime" 95 | assert current_datetime_tool.args == {} 96 | assert current_datetime_tool.arg_types == {} 97 | assert current_datetime_tool.arg_desc == {} 98 | assert await current_datetime_tool.acall() == "2025-07-23T09:10:10.0+00:00" 99 | ``` -------------------------------------------------------------------------------- /docs/docs/tutorials/async/index.md: -------------------------------------------------------------------------------- ```markdown 1 | # Async DSPy Programming 2 | 3 | DSPy provides native support for asynchronous programming, allowing you to build more efficient and 4 | scalable applications. This guide will walk you through how to leverage async capabilities in DSPy, 5 | covering both built-in modules and custom implementations. 6 | 7 | ## Why Use Async in DSPy? 8 | 9 | Asynchronous programming in DSPy offers several benefits: 10 | - Improved performance through concurrent operations 11 | - Better resource utilization 12 | - Reduced waiting time for I/O-bound operations 13 | - Enhanced scalability for handling multiple requests 14 | 15 | ## When Should I use Sync or Async? 16 | 17 | Choosing between synchronous and asynchronous programming in DSPy depends on your specific use case. 18 | Here's a guide to help you make the right choice: 19 | 20 | Use Synchronous Programming When 21 | 22 | - You're exploring or prototyping new ideas 23 | - You're conducting research or experiments 24 | - You're building small to medium-sized applications 25 | - You need simpler, more straightforward code 26 | - You want easier debugging and error tracking 27 | 28 | Use Asynchronous Programming When: 29 | 30 | - You're building a high-throughput service (high QPS) 31 | - You're working with tools that only support async operations 32 | - You need to handle multiple concurrent requests efficiently 33 | - You're building a production service that requires high scalability 34 | 35 | ### Important Considerations 36 | 37 | While async programming offers performance benefits, it comes with some trade-offs: 38 | 39 | - More complex error handling and debugging 40 | - Potential for subtle, hard-to-track bugs 41 | - More complex code structure 42 | - Different code between ipython (Colab, Jupyter lab, Databricks notebooks, ...) and normal python runtime. 43 | 44 | We recommend starting with synchronous programming for most development scenarios and switching to async 45 | only when you have a clear need for its benefits. This approach allows you to focus on the core logic of 46 | your application before dealing with the additional complexity of async programming. 47 | 48 | ## Using Built-in Modules Asynchronously 49 | 50 | Most DSPy built-in modules support asynchronous operations through the `acall()` method. This method 51 | maintains the same interface as the synchronous `__call__` method but operates asynchronously. 52 | 53 | Here's a basic example using `dspy.Predict`: 54 | 55 | ```python 56 | import dspy 57 | import asyncio 58 | import os 59 | 60 | os.environ["OPENAI_API_KEY"] = "your_api_key" 61 | 62 | dspy.configure(lm=dspy.LM("openai/gpt-4o-mini")) 63 | predict = dspy.Predict("question->answer") 64 | 65 | async def main(): 66 | # Use acall() for async execution 67 | output = await predict.acall(question="why did a chicken cross the kitchen?") 68 | print(output) 69 | 70 | 71 | asyncio.run(main()) 72 | ``` 73 | 74 | ### Working with Async Tools 75 | 76 | DSPy's `Tool` class seamlessly integrates with async functions. When you provide an async 77 | function to `dspy.Tool`, you can execute it using `acall()`. This is particularly useful 78 | for I/O-bound operations or when working with external services. 79 | 80 | ```python 81 | import asyncio 82 | import dspy 83 | import os 84 | 85 | os.environ["OPENAI_API_KEY"] = "your_api_key" 86 | 87 | async def foo(x): 88 | # Simulate an async operation 89 | await asyncio.sleep(0.1) 90 | print(f"I get: {x}") 91 | 92 | # Create a tool from the async function 93 | tool = dspy.Tool(foo) 94 | 95 | async def main(): 96 | # Execute the tool asynchronously 97 | await tool.acall(x=2) 98 | 99 | asyncio.run(main()) 100 | ``` 101 | 102 | Note: When using `dspy.ReAct` with tools, calling `acall()` on the ReAct instance will automatically 103 | execute all tools asynchronously using their `acall()` methods. 104 | 105 | ## Creating Custom Async DSPy Modules 106 | 107 | To create your own async DSPy module, implement the `aforward()` method instead of `forward()`. This method 108 | should contain your module's async logic. Here's an example of a custom module that chains two async operations: 109 | 110 | ```python 111 | import dspy 112 | import asyncio 113 | import os 114 | 115 | os.environ["OPENAI_API_KEY"] = "your_api_key" 116 | dspy.configure(lm=dspy.LM("openai/gpt-4o-mini")) 117 | 118 | class MyModule(dspy.Module): 119 | def __init__(self): 120 | self.predict1 = dspy.ChainOfThought("question->answer") 121 | self.predict2 = dspy.ChainOfThought("answer->simplified_answer") 122 | 123 | async def aforward(self, question, **kwargs): 124 | # Execute predictions sequentially but asynchronously 125 | answer = await self.predict1.acall(question=question) 126 | return await self.predict2.acall(answer=answer) 127 | 128 | 129 | async def main(): 130 | mod = MyModule() 131 | result = await mod.acall(question="Why did a chicken cross the kitchen?") 132 | print(result) 133 | 134 | 135 | asyncio.run(main()) 136 | ``` 137 | ``` -------------------------------------------------------------------------------- /tests/reliability/complex_types/generated/test_many_types_1/schema.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "description": "The program is designed to process various data types including tuples, enums, datetime values, literals, objects, and nested objects containing these types. The program will accept inputs of these types, perform specified operations on them, and return the results. The operations could include validation, transformation, and extraction of information from these inputs.", 3 | "properties": { 4 | "datetimeField": { 5 | "desc": null, 6 | "format": "date-time", 7 | "prefix": "Datetime Field:", 8 | "type": "string" 9 | }, 10 | "enumField": { 11 | "enum": ["option1", "option2", "option3"], 12 | "type": "string" 13 | }, 14 | "literalField": { 15 | "const": "literalValue", 16 | "enum": ["literalValue"], 17 | "type": "string" 18 | }, 19 | "nestedObjectField": { 20 | "properties": { 21 | "datetimeField": { 22 | "format": "date-time", 23 | "type": "string" 24 | }, 25 | "enumField": { 26 | "enum": ["option1", "option2", "option3"], 27 | "type": "string" 28 | }, 29 | "literalField": { 30 | "const": "literalValue", 31 | "enum": ["literalValue"], 32 | "type": "string" 33 | }, 34 | "tupleField": { 35 | "items": { 36 | "anyOf": [ 37 | { 38 | "type": "string" 39 | }, 40 | { 41 | "type": "number" 42 | } 43 | ] 44 | }, 45 | "maxItems": 2, 46 | "minItems": 2, 47 | "type": "array" 48 | } 49 | }, 50 | "required": ["tupleField", "enumField", "datetimeField", "literalField"], 51 | "type": "object" 52 | }, 53 | "objectField": { 54 | "properties": { 55 | "subField1": { 56 | "type": "string" 57 | }, 58 | "subField2": { 59 | "type": "number" 60 | } 61 | }, 62 | "required": ["subField1", "subField2"], 63 | "type": "object" 64 | }, 65 | "processedDatetimeField": { 66 | "desc": null, 67 | "format": "date-time", 68 | "prefix": "Processed Datetime Field:", 69 | "type": "string" 70 | }, 71 | "processedEnumField": { 72 | "enum": ["option1", "option2", "option3"], 73 | "type": "string" 74 | }, 75 | "processedLiteralField": { 76 | "const": "literalValue", 77 | "enum": ["literalValue"], 78 | "type": "string" 79 | }, 80 | "processedNestedObjectField": { 81 | "properties": { 82 | "additionalField": { 83 | "type": "boolean" 84 | }, 85 | "datetimeField": { 86 | "format": "date-time", 87 | "type": "string" 88 | }, 89 | "enumField": { 90 | "enum": ["option1", "option2", "option3"], 91 | "type": "string" 92 | }, 93 | "literalField": { 94 | "const": "literalValue", 95 | "enum": ["literalValue"], 96 | "type": "string" 97 | }, 98 | "tupleField": { 99 | "items": { 100 | "anyOf": [ 101 | { 102 | "type": "string" 103 | }, 104 | { 105 | "type": "number" 106 | } 107 | ] 108 | }, 109 | "maxItems": 2, 110 | "minItems": 2, 111 | "type": "array" 112 | } 113 | }, 114 | "required": [ 115 | "tupleField", 116 | "enumField", 117 | "datetimeField", 118 | "literalField", 119 | "additionalField" 120 | ], 121 | "type": "object" 122 | }, 123 | "processedObjectField": { 124 | "properties": { 125 | "additionalField": { 126 | "type": "boolean" 127 | }, 128 | "subField1": { 129 | "type": "string" 130 | }, 131 | "subField2": { 132 | "type": "number" 133 | } 134 | }, 135 | "required": ["subField1", "subField2", "additionalField"], 136 | "type": "object" 137 | }, 138 | "processedTupleField": { 139 | "desc": null, 140 | "items": { 141 | "anyOf": [ 142 | { 143 | "type": "string" 144 | }, 145 | { 146 | "type": "number" 147 | } 148 | ] 149 | }, 150 | "prefix": "Processed Tuple Field:", 151 | "type": "array" 152 | }, 153 | "tupleField": { 154 | "desc": null, 155 | "items": { 156 | "anyOf": [ 157 | { 158 | "type": "string" 159 | }, 160 | { 161 | "type": "number" 162 | } 163 | ] 164 | }, 165 | "prefix": "Tuple Field:", 166 | "type": "array" 167 | } 168 | }, 169 | "required": [ 170 | "tupleField", 171 | "enumField", 172 | "datetimeField", 173 | "literalField", 174 | "objectField", 175 | "nestedObjectField", 176 | "processedTupleField", 177 | "processedEnumField", 178 | "processedDatetimeField", 179 | "processedLiteralField", 180 | "processedObjectField", 181 | "processedNestedObjectField" 182 | ], 183 | "type": "object" 184 | } 185 | ``` -------------------------------------------------------------------------------- /.github/.internal_dspyai/internals/build-and-release.md: -------------------------------------------------------------------------------- ```markdown 1 | # Build & Release Workflow Implementation 2 | 3 | The [build_and_release](https://github.com/stanfordnlp/dspy/blob/main/.github/workflows/build_and_release.yml) workflow automates deployments of dspy-ai to pypi. For a guide to triggering a release using the workflow, refer to [release checklist](release-checklist.md). 4 | 5 | ## Overview 6 | 7 | At a high level, the workflow works as follows: 8 | 9 | 1. Maintainer of the repo pushes a tag following [semver](https://semver.org/) versioning for the new release. 10 | 2. This triggers the github action which extracts the tag (the version) 11 | 3. Builds and publishes a release on [test-pypi](https://test.pypi.org/project/dspy-ai-test/) 12 | 4. Uses the test-pypi release to run build_utils/tests/intro.py with the new release as an integration test. Note intro.py is a copy of the intro notebook. 13 | 5. Assuming the test runs successfully, it pushes a release to [pypi](https://pypi.org/project/dspy-ai/). If not, the user can delete the tag, make the fixes and then push the tag again. Versioning for multiple releases to test-pypi with the same tag version is taken care of by the workflow by appending a pre-release identifier, so the user only needs to consider the version for pypi. 14 | 6. (Currently manual) the user creates a release and includes release notes, as described in docs/docs/release-checklist.md 15 | 16 | ## Implementation Details 17 | 18 | The workflow executes a series of jobs in sequence: 19 | - extract-tag 20 | - build-and-publish-test-pypi 21 | - test-intro-script 22 | - build-and-publish-pypi 23 | 24 | #### extract-tag 25 | Extracts the tag pushed to the commit. This tag is expected to be the version of the new deployment. 26 | 27 | #### build-and-publish-test-pypi 28 | Builds and publishes the package to test-pypi. 29 | 1. Determines the version that should be deployed to test-pypi. There may be an existing deployment with the version specified by the tag in the case that a deployment failed and the maintainer made some changes and pushed the same tag again (which is the intended usage). The following logic is implemented [test_version.py](https://github.com/stanfordnlp/dspy/blob/main/build_utils/test_version.py) 30 | 1. Load the releases on test-pypi 31 | 1. Check if there is a release matching our current tag 32 | 1. If not, create a release with the current tag 33 | 1. If it exists, oad the latest published version (this will either be the version with the tag itself, or the tag + a pre-release version). In either case, increment the pre-release version. 34 | 1. Updates the version placeholder in [setup.py](https://github.com/stanfordnlp/dspy/blob/main/setup.py) to the version obtained in step 1. 35 | 1. Updates the version placeholder in [pyproject.toml](https://github.com/stanfordnlp/dspy/blob/main/pyproject.toml) to the version obtained in step 1. 36 | 1. Updates the package name placeholder in [setup.py](https://github.com/stanfordnlp/dspy/blob/main/setup.py) to `dspy-ai-test`* 37 | 1. Updates the package name placeholder in [pyproject.toml](https://github.com/stanfordnlp/dspy/blob/main/pyproject.toml) to `dspy-ai-test`* 38 | 1. Builds the binary wheel 39 | 1. Publishes the package to test-pypi. 40 | 41 | 42 | #### test-intro-script 43 | Runs the pytest containing the intro script as an integration test using the package published to test-pypi. This is a validation step before publishing to pypi. 44 | 1. Uses a loop to install the version just published to test-pypi as sometimes there is a race condition between the package becoming available for installation and this job executing. 45 | 2. Runs the test to ensure the package is working as expected. 46 | 3. If this fails, the workflow fails and the maintainer needs to make a fix and delete and then recreate the tag. 47 | 48 | #### build-and-publish-pypi 49 | Builds and publishes the package to pypi. 50 | 51 | 1. Updates the version placeholder in [setup.py](https://github.com/stanfordnlp/dspy/blob/main/setup.py) to the version obtained in step 1. 52 | 1. Updates the version placeholder in [pyproject.toml](https://github.com/stanfordnlp/dspy/blob/main/pyproject.toml) to the version obtained in step 1. 53 | 1. Updates the package name placeholder in [setup.py](https://github.com/stanfordnlp/dspy/blob/main/setup.py) to `dspy-ai`* 54 | 1. Updates the package name placeholder in [pyproject.toml](https://github.com/stanfordnlp/dspy/blob/main/pyproject.toml) to `dspy-ai`* 55 | 1. Builds the binary wheel 56 | 1. Publishes the package to pypi. 57 | 58 | 59 | \* The package name is updated by the workflow to allow the same files to be used to build both the pypi and test-pypi packages. ``` -------------------------------------------------------------------------------- /docs/docs/community/community-resources.md: -------------------------------------------------------------------------------- ```markdown 1 | # Resources 2 | 3 | This is the list of tutorials and blog posts on DSPy. If you would like to add your own tutorial, please make a PR. 4 | 5 | 6 | ## A Few Blogs & Videos on using DSPy 7 | 8 | 9 | 10 | ### Blogs 11 | 12 | | **Name** | **Link** | 13 | |---|---| 14 | | **Why I bet on DSPy** | [Blog](https://blog.isaacbmiller.com/posts/dspy) | 15 | | **Not Your Average Prompt Engineering** | [Blog](https://jina.ai/news/dspy-not-your-average-prompt-engineering/) | 16 | | **Why I'm excited about DSPy** | [Blog](https://substack.stephen.so/p/why-im-excited-about-dspy) | 17 | | **Achieving GPT-4 Performance at Lower Cost** | [Link](https://gradient.ai/blog/achieving-gpt-4-level-performance-at-lower-cost-using-dspy) | 18 | | **Prompt engineering is a task best left to AI models** | [Link](https://www.theregister.com/2024/02/22/prompt_engineering_ai_models/) | 19 | | **What makes DSPy a valuable framework for developing complex language model pipelines?** | [Link](https://medium.com/@sujathamudadla1213/what-makes-dspy-a-valuable-framework-for-developing-complex-language-model-pipelines-edfa5b4bcf9b) | 20 | | **DSPy: A new framework to program your foundation models just by prompting** | [Link](https://www.linkedin.com/pulse/dspy-new-framework-program-your-foundation-models-just-prompting-lli4c/) | 21 | | **Intro to DSPy: Goodbye Prompting, Hello Programming** | [Link](https://medium.com/towards-data-science/intro-to-dspy-goodbye-prompting-hello-programming-4ca1c6ce3eb9) | 22 | | **DSPyGen: Revolutionizing AI** | [Link](https://www.linkedin.com/pulse/launch-alert-dspygen-20242252-revolutionizing-ai-sean-chatman--g9f1c/) | 23 | | **Building an AI Assistant with DSPy** | [Link](https://www.linkedin.com/pulse/building-ai-assistant-dspy-valliappa-lakshmanan-vgnsc/) | 24 | | **Building Self-improving Agents in Production with DSPy** | [Link](https://relevanceai.com/blog/building-self-improving-agentic-systems-in-production-with-dspy) | 25 | 26 | 27 | ### Videos 28 | | **Name** | **Link** | 29 | |---|---| 30 | | **DSPy Explained! (60K views)** | [Link](https://www.youtube.com/watch?v=41EfOY0Ldkc) | 31 | | **DSPy Intro from Sephora (25K views)** | [Link](https://www.youtube.com/watch?v=D2HurSldDkE) | 32 | | **Structured Outputs with DSPy** | [Link](https://www.youtube.com/watch?v=tVw3CwrN5-8) | 33 | | **DSPy and ColBERT - Weaviate Podcast** | [Link](https://www.youtube.com/watch?v=CDung1LnLbY) | 34 | | **SBTB23 DSPy** | [Link](https://www.youtube.com/watch?v=Dt3H2ninoeY) | 35 | | **Optimization with DSPy and LangChain** | [Link](https://www.youtube.com/watch?v=4EXOmWeqXRc) | 36 | | **Automated Prompt Engineering + Visualization** | [Link](https://www.youtube.com/watch?v=eAZ2LtJ6D5k) | 37 | | **Transforming LM Calls into Pipelines** | [Link](https://www.youtube.com/watch?v=NoaDWKHdkHg) | 38 | | **NeurIPS Hacker Cup: DSPy for Code Gen** | [Link](https://www.youtube.com/watch?v=gpe-rtJN8z8) | 39 | | **MIPRO and DSPy - Weaviate Podcast** | [Link](https://www.youtube.com/watch?v=skMH3DOV_UQ) | 40 | | **Getting Started with RAG in DSPy** | [Link](https://www.youtube.com/watch?v=CEuUG4Umfxs) | 41 | | **Adding Depth to DSPy Programs** | [Link](https://www.youtube.com/watch?v=0c7Ksd6BG88) | 42 | | **Programming Foundation Models with DSPy** | [Link](https://www.youtube.com/watch?v=Y94tw4eDHW0) | 43 | | **DSPy End-to-End: SF Meetup** | [Link](https://www.youtube.com/watch?v=Y81DoFmt-2U) | 44 | | **Monitoring & Tracing DSPy with Langtrace** | [Link](https://langtrace.ai/blog/announcing-dspy-support-in-langtrace) | 45 | | **Teaching chat models to solve chess puzzles using DSPy + Finetuning** | [Link](https://raw.sh/posts/chess_puzzles) | 46 | | **Build Self-Improving AI Agents with DSPy (No Code)** | [Link](https://www.youtube.com/watch?v=UY8OsMlV21Y) | 47 | | **DSPy 3.0 and DSPy at Databricks** | [Link](https://www.youtube.com/watch?v=grIuzesOwwU) | 48 | | **Context Engineering with DSPy** | [Link](https://www.youtube.com/watch?v=1I9PoXzvWcs) | 49 | 50 | ### Slides 51 | 52 | | **Name** | **Link** | 53 | |---|---| 54 | | **Context Engineering with DSPy** | [Link](https://docs.google.com/presentation/d/1ydssF387l1LsJ14z41_HUqsJwU77tKZJNGnAWPsw-1I/edit?usp=sharing) | 55 | 56 | 57 | ### Podcasts 58 | 59 | Weaviate has a directory of 10 amazing notebooks and 6 podcasts! 60 | Huge shoutout to them for the massive support ❤️. See the [Weaviate DSPy directory](https://weaviate.io/developers/weaviate/more-resources/dspy). 61 | 62 | 63 | This list represents a curated selection of DSPy resources. We continuously add new content as it becomes available in the community. 64 | 65 | Credit: Some of these resources were originally compiled in the [Awesome DSPy](https://github.com/ganarajpr/awesome-dspy/tree/master) repo. 66 | 67 | ``` -------------------------------------------------------------------------------- /dspy/datasets/alfworld/alfworld.py: -------------------------------------------------------------------------------- ```python 1 | import os 2 | import queue 3 | import random 4 | 5 | 6 | def env_worker(inq, outq): 7 | """ 8 | Worker process: creates a single AlfredTWEnv instance, 9 | handles 'init' (with task idx) and 'step' (with action). 10 | """ 11 | 12 | try: 13 | import io 14 | from contextlib import redirect_stderr, redirect_stdout 15 | 16 | import alfworld.agents.environment as environment 17 | import yaml 18 | except ImportError: 19 | raise ImportError( 20 | "alfworld is not installed. " 21 | "Please install it via `pip install alfworld==0.3.5` then run `alfworld-download`." 22 | ) 23 | 24 | buf = io.StringIO() 25 | base_dir = os.path.dirname(os.path.abspath(__file__)) 26 | config_path = os.path.join(base_dir, "base_config.yml") 27 | 28 | with open(config_path) as f: 29 | config = yaml.safe_load(f) 30 | 31 | with redirect_stdout(buf), redirect_stderr(buf): 32 | base_env = environment.AlfredTWEnv(config, train_eval="train") 33 | 34 | env = None 35 | while True: 36 | cmd, data = inq.get() 37 | if cmd == "init": 38 | env = base_env.init_env(batch_size=1) 39 | env.skip(data) 40 | task_def, info = env.reset() 41 | outq.put((task_def[0], info)) 42 | elif cmd == "step": 43 | obs, rew, done, info = env.step([data]) 44 | outq.put((obs, rew, done, info)) 45 | elif cmd == "close": 46 | outq.put("CLOSED") 47 | break 48 | else: 49 | outq.put("UNKNOWN_CMD") 50 | 51 | 52 | class EnvPool: 53 | """ 54 | Pool of processes, each with a unique env_worker. 55 | Acquire a worker using a context manager for safe usage: 56 | with pool.session() as sess: 57 | sess.init(5) # init with idx=5 58 | obs, rew, done, info = sess.step("go north") 59 | ... 60 | """ 61 | 62 | def __init__(self, size=2): 63 | self.size = size 64 | self.workers = [] 65 | self.available = queue.Queue() 66 | 67 | try: 68 | import multiprocess as mp 69 | except ImportError: 70 | raise ImportError("multiprocess is not installed. " "Please install it via `pip install multiprocess`.") 71 | 72 | # Must call set_start_method('spawn') here, before creating any processes 73 | try: 74 | mp.set_start_method("spawn", force=True) 75 | except RuntimeError: 76 | # If it's already set, ignore 77 | pass 78 | 79 | ctx = mp.get_context("spawn") 80 | for i in range(size): 81 | inq = ctx.Queue() 82 | outq = ctx.Queue() 83 | p = ctx.Process(target=env_worker, args=(inq, outq), daemon=True) 84 | p.start() 85 | self.workers.append((inq, outq, p)) 86 | self.available.put(i) 87 | 88 | def _acquire(self): 89 | wid = self.available.get() 90 | return wid, self.workers[wid][0], self.workers[wid][1] 91 | 92 | def _release(self, wid): 93 | self.available.put(wid) 94 | 95 | def close_all(self): 96 | """Close all processes in the pool.""" 97 | while not self.available.empty(): 98 | wid = self.available.get() 99 | inq, outq, proc = self.workers[wid] 100 | inq.put(("close", None)) 101 | outq.get() # Wait 'CLOSED' 102 | inq.close() 103 | outq.close() 104 | proc.join() 105 | 106 | def session(self): 107 | """Context manager that acquires/releases a single worker.""" 108 | return _EnvSession(self) 109 | 110 | 111 | class _EnvSession: 112 | """ 113 | A context manager that acquires a worker from the pool, 114 | provides .init(idx) and .step(action), then releases the worker. 115 | """ 116 | 117 | def __init__(self, pool: EnvPool): 118 | self.pool = pool 119 | self.wid = None 120 | self.inq = None 121 | self.outq = None 122 | 123 | def __enter__(self): 124 | self.wid, self.inq, self.outq = self.pool._acquire() 125 | return self 126 | 127 | def __exit__(self, exc_type, exc_val, exc_tb): 128 | self.pool._release(self.wid) 129 | 130 | def init(self, idx): 131 | self.inq.put(("init", idx)) 132 | return self.outq.get() # (task_def, info) 133 | 134 | def step(self, action): 135 | self.inq.put(("step", action)) 136 | return self.outq.get() # (obs, rew, done, info) 137 | 138 | 139 | class AlfWorld: 140 | def __init__(self, max_threads=20): 141 | self.POOL = EnvPool(size=max_threads) 142 | 143 | import dspy 144 | 145 | dataset = [dspy.Example(idx=idx).with_inputs("idx") for idx in range(3500)] 146 | random.Random(0).shuffle(dataset) 147 | 148 | trainset, devset = dataset[:3000], dataset[-500:] 149 | assert len(trainset) + len(devset) <= len(dataset) 150 | 151 | self.trainset = trainset 152 | self.devset = devset 153 | 154 | def __del__(self): 155 | self.POOL.close_all() 156 | ``` -------------------------------------------------------------------------------- /tests/retrievers/test_embeddings.py: -------------------------------------------------------------------------------- ```python 1 | import os 2 | import tempfile 3 | from concurrent.futures import ThreadPoolExecutor 4 | 5 | import numpy as np 6 | import pytest 7 | 8 | from dspy.retrievers.embeddings import Embeddings 9 | 10 | 11 | def dummy_corpus(): 12 | return [ 13 | "The cat sat on the mat.", 14 | "The dog barked at the mailman.", 15 | "Birds fly in the sky.", 16 | ] 17 | 18 | 19 | def dummy_embedder(texts): 20 | embeddings = [] 21 | for text in texts: 22 | if "cat" in text: 23 | embeddings.append(np.array([1, 0, 0], dtype=np.float32)) 24 | elif "dog" in text: 25 | embeddings.append(np.array([0, 1, 0], dtype=np.float32)) 26 | else: 27 | embeddings.append(np.array([0, 0, 1], dtype=np.float32)) 28 | return np.stack(embeddings) 29 | 30 | 31 | def test_embeddings_basic_search(): 32 | corpus = dummy_corpus() 33 | embedder = dummy_embedder 34 | 35 | retriever = Embeddings(corpus=corpus, embedder=embedder, k=1) 36 | 37 | query = "I saw a dog running." 38 | result = retriever(query) 39 | 40 | assert hasattr(result, "passages") 41 | assert hasattr(result, "indices") 42 | 43 | assert isinstance(result.passages, list) 44 | assert isinstance(result.indices, list) 45 | 46 | assert len(result.passages) == 1 47 | assert len(result.indices) == 1 48 | 49 | assert result.passages[0] == "The dog barked at the mailman." 50 | 51 | 52 | def test_embeddings_multithreaded_search(): 53 | corpus = dummy_corpus() 54 | embedder = dummy_embedder 55 | 56 | retriever = Embeddings(corpus=corpus, embedder=embedder, k=1) 57 | 58 | queries = [ 59 | ("A cat is sitting on the mat.", "The cat sat on the mat."), 60 | ("My dog is awesome!", "The dog barked at the mailman."), 61 | ("Birds flying high.", "Birds fly in the sky."), 62 | ] * 10 63 | 64 | def worker(query_text, expected_passage): 65 | result = retriever(query_text) 66 | assert result.passages[0] == expected_passage 67 | return result.passages[0] 68 | 69 | with ThreadPoolExecutor(max_workers=10) as executor: 70 | futures = [executor.submit(worker, q, expected) for q, expected in queries] 71 | # Results will be in original order 72 | results = [f.result() for f in futures] 73 | assert results[0] == "The cat sat on the mat." 74 | assert results[1] == "The dog barked at the mailman." 75 | assert results[2] == "Birds fly in the sky." 76 | 77 | 78 | def test_embeddings_save_load(): 79 | corpus = dummy_corpus() 80 | embedder = dummy_embedder 81 | 82 | original_retriever = Embeddings(corpus=corpus, embedder=embedder, k=2, normalize=False, brute_force_threshold=1000) 83 | 84 | with tempfile.TemporaryDirectory() as temp_dir: 85 | save_path = os.path.join(temp_dir, "test_embeddings") 86 | 87 | # Save original 88 | original_retriever.save(save_path) 89 | 90 | # Verify files were created 91 | assert os.path.exists(os.path.join(save_path, "config.json")) 92 | assert os.path.exists(os.path.join(save_path, "corpus_embeddings.npy")) 93 | assert not os.path.exists(os.path.join(save_path, "faiss_index.bin")) # No FAISS for small corpus 94 | 95 | # Load into new instance 96 | new_retriever = Embeddings(corpus=["dummy"], embedder=embedder, k=1, normalize=True, brute_force_threshold=500) 97 | new_retriever.load(save_path, embedder) 98 | 99 | # Verify configuration was loaded correctly 100 | assert new_retriever.corpus == corpus 101 | assert new_retriever.k == 2 102 | assert new_retriever.normalize is False 103 | assert new_retriever.embedder == embedder 104 | assert new_retriever.index is None 105 | 106 | # Verify search results are preserved 107 | query = "cat sitting" 108 | original_result = original_retriever(query) 109 | loaded_result = new_retriever(query) 110 | assert loaded_result.passages == original_result.passages 111 | assert loaded_result.indices == original_result.indices 112 | 113 | 114 | def test_embeddings_from_saved(): 115 | corpus = dummy_corpus() 116 | embedder = dummy_embedder 117 | 118 | original_retriever = Embeddings(corpus=corpus, embedder=embedder, k=3, normalize=True, brute_force_threshold=1000) 119 | 120 | with tempfile.TemporaryDirectory() as temp_dir: 121 | save_path = os.path.join(temp_dir, "test_embeddings") 122 | 123 | original_retriever.save(save_path) 124 | loaded_retriever = Embeddings.from_saved(save_path, embedder) 125 | 126 | assert loaded_retriever.k == original_retriever.k 127 | assert loaded_retriever.normalize == original_retriever.normalize 128 | assert loaded_retriever.corpus == original_retriever.corpus 129 | 130 | 131 | 132 | def test_embeddings_load_nonexistent_path(): 133 | with pytest.raises((FileNotFoundError, OSError)): 134 | Embeddings.from_saved("/nonexistent/path", dummy_embedder) 135 | ``` -------------------------------------------------------------------------------- /docs/docs/tutorials/output_refinement/best-of-n-and-refine.md: -------------------------------------------------------------------------------- ```markdown 1 | # Output Refinement: BestOfN and Refine 2 | 3 | Both `BestOfN` and `Refine` are DSPy modules designed to improve the reliability and quality of predictions by making multiple `LM` calls with different rollout IDs to bypass caching. Both modules stop when they have reached `N` attempts or when the `reward_fn` returns an award above the `threshold`. 4 | 5 | ## BestOfN 6 | 7 | `BestOfN` is a module that runs the provided module multiple times (up to `N`) with different rollout IDs. It returns either the first prediction that passes a specified threshold or the one with the highest reward if none meets the threshold. 8 | 9 | ### Basic Usage 10 | 11 | Lets say we wanted to have the best chance of getting a one word answer from the model. We could use `BestOfN` to try multiple rollout IDs and return the best result. 12 | 13 | ```python 14 | import dspy 15 | 16 | def one_word_answer(args, pred: dspy.Prediction) -> float: 17 | return 1.0 if len(pred.answer.split()) == 1 else 0.0 18 | 19 | best_of_3 = dspy.BestOfN( 20 | module=dspy.ChainOfThought("question -> answer"), 21 | N=3, 22 | reward_fn=one_word_answer, 23 | threshold=1.0 24 | ) 25 | 26 | result = best_of_3(question="What is the capital of Belgium?") 27 | print(result.answer) # Brussels 28 | ``` 29 | 30 | ### Error Handling 31 | 32 | By default, if the module encounters an error during an attempt, it will continue trying until it reaches `N` attempts. You can adjust this behavior with the `fail_count` parameter: 33 | 34 | ```python 35 | best_of_3 = dspy.BestOfN( 36 | module=qa, 37 | N=3, 38 | reward_fn=one_word_answer, 39 | threshold=1.0, 40 | fail_count=1 41 | ) 42 | 43 | best_of_3(question="What is the capital of Belgium?") 44 | # raises an error after the first failure 45 | ``` 46 | 47 | ## Refine 48 | 49 | `Refine` extends the functionality of `BestOfN` by adding an automatic feedback loop. After each unsuccessful attempt (except the final one), it automatically generates detailed feedback about the module's performance and uses this feedback as hints for subsequent runs. 50 | 51 | ### Basic Usage 52 | 53 | ```python 54 | import dspy 55 | 56 | def one_word_answer(args, pred: dspy.Prediction) -> float: 57 | return 1.0 if len(pred.answer.split()) == 1 else 0.0 58 | 59 | refine = dspy.Refine( 60 | module=dspy.ChainOfThought("question -> answer"), 61 | N=3, 62 | reward_fn=one_word_answer, 63 | threshold=1.0 64 | ) 65 | 66 | result = refine(question="What is the capital of Belgium?") 67 | print(result.answer) # Brussels 68 | ``` 69 | 70 | ### Error Handling 71 | 72 | Like `BestOfN`, `Refine` will try up to `N` times by default, even if errors occur. You can control this with the `fail_count` parameter: 73 | 74 | ```python 75 | # Stop after the first error 76 | refine = dspy.Refine( 77 | module=qa, 78 | N=3, 79 | reward_fn=one_word_answer, 80 | threshold=1.0, 81 | fail_count=1 82 | ) 83 | ``` 84 | 85 | ## Comparison: BestOfN vs. Refine 86 | 87 | Both modules serve similar purposes but differ in their approach: 88 | 89 | - `BestOfN` simply tries different rollout IDs and selects the best resulting prediction as defined by the `reward_fn`. 90 | - `Refine` adds an feedback loop, using the lm to generate a detailed feedback about the module's own performance using the previous prediction and the code in the `reward_fn`. This feedback is then used as hints for subsequent runs. 91 | 92 | ## Practical Examples 93 | 94 | ### Ensuring Factual Correctness 95 | 96 | ```python 97 | import dspy 98 | 99 | class FactualityJudge(dspy.Signature): 100 | """Determine if a statement is factually accurate.""" 101 | statement: str = dspy.InputField() 102 | is_factual: bool = dspy.OutputField() 103 | 104 | factuality_judge = dspy.ChainOfThought(FactualityJudge) 105 | 106 | def factuality_reward(args, pred: dspy.Prediction) -> float: 107 | statement = pred.answer 108 | result = factuality_judge(statement) 109 | return 1.0 if result.is_factual else 0.0 110 | 111 | refined_qa = dspy.Refine( 112 | module=dspy.ChainOfThought("question -> answer"), 113 | N=3, 114 | reward_fn=factuality_reward, 115 | threshold=1.0 116 | ) 117 | 118 | result = refined_qa(question="Tell me about Belgium's capital city.") 119 | print(result.answer) 120 | ``` 121 | 122 | ### Summarization - Controlling Response Length 123 | 124 | ```python 125 | import dspy 126 | 127 | def ideal_length_reward(args, pred: dspy.Prediction) -> float: 128 | """ 129 | Reward the summary for being close to 75 words with a tapering off for longer summaries. 130 | """ 131 | word_count = len(pred.summary.split()) 132 | distance = abs(word_count - 75) 133 | return max(0.0, 1.0 - (distance / 125)) 134 | 135 | optimized_summarizer = dspy.BestOfN( 136 | module=dspy.ChainOfThought("text -> summary"), 137 | N=50, 138 | reward_fn=ideal_length_reward, 139 | threshold=0.9 140 | ) 141 | 142 | result = optimized_summarizer( 143 | text="[Long text to summarize...]" 144 | ) 145 | print(result.summary) 146 | ``` 147 | 148 | ## Migration from `dspy.Suggest` and `dspy.Assert` 149 | 150 | `BestOfN` and `Refine` are the replacements for `dspy.Suggest` and `dspy.Assert` as of DSPy 2.6. 151 | ``` -------------------------------------------------------------------------------- /tests/predict/test_program_of_thought.py: -------------------------------------------------------------------------------- ```python 1 | import shutil 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | import dspy 7 | from dspy import ProgramOfThought, Signature 8 | from dspy.utils import DummyLM 9 | 10 | # This test suite requires deno to be installed. Please install deno following https://docs.deno.com/runtime/getting_started/installation/ 11 | is_deno_available = shutil.which("deno") is not None 12 | 13 | 14 | class BasicQA(Signature): 15 | question = dspy.InputField() 16 | answer = dspy.OutputField(desc="often between 1 and 5 words") 17 | 18 | 19 | @pytest.mark.skipif(not is_deno_available, reason="Deno is not installed or not in PATH") 20 | def test_pot_code_generation(): 21 | lm = DummyLM( 22 | [ 23 | { 24 | "reasoning": "Reason_A", 25 | "generated_code": "```python\nresult = 1+1\nfinal_answer({'answer': result})\n```", 26 | }, 27 | {"reasoning": "Reason_B", "answer": "2"}, 28 | ] 29 | ) 30 | dspy.settings.configure(lm=lm) 31 | pot = ProgramOfThought(BasicQA) 32 | res = pot(question="What is 1+1?") 33 | assert res.answer == "2" 34 | assert pot.interpreter.deno_process is None 35 | 36 | 37 | # This test ensures the old finetuned saved models still work 38 | @pytest.mark.skipif(not is_deno_available, reason="Deno is not installed or not in PATH") 39 | def test_old_style_pot(): 40 | lm = DummyLM( 41 | [ 42 | {"reasoning": "Reason_A", "generated_code": "```python\nresult = 1+1\n```"}, 43 | {"reasoning": "Reason_B", "answer": "2"}, 44 | ] 45 | ) 46 | dspy.settings.configure(lm=lm) 47 | pot = ProgramOfThought(BasicQA) 48 | res = pot(question="What is 1+1?") 49 | assert res.answer == "2" 50 | assert pot.interpreter.deno_process is None 51 | 52 | 53 | class ExtremumFinder(Signature): 54 | input_list = dspy.InputField() 55 | maximum = dspy.OutputField(desc="The maximum of the given numbers") 56 | minimum = dspy.OutputField(desc="The minimum of the given numbers") 57 | 58 | 59 | @pytest.mark.skipif(not is_deno_available, reason="Deno is not installed or not in PATH") 60 | def test_pot_support_multiple_fields(): 61 | lm = DummyLM( 62 | [ 63 | { 64 | "reasoning": "Reason_A", 65 | "generated_code": "```python\nmaximum = 6\nminimum = 2\nfinal_answer({'maximum': maximum, 'minimum': minimum})\n```", 66 | }, 67 | {"reasoning": "Reason_B", "maximum": "6", "minimum": "2"}, 68 | ] 69 | ) 70 | dspy.settings.configure(lm=lm) 71 | pot = ProgramOfThought(ExtremumFinder) 72 | res = pot(input_list="2, 3, 5, 6") 73 | assert res.maximum == "6" 74 | assert res.minimum == "2" 75 | assert pot.interpreter.deno_process is None 76 | 77 | 78 | @pytest.mark.skipif(not is_deno_available, reason="Deno is not installed or not in PATH") 79 | def test_pot_code_generation_with_one_error(): 80 | lm = DummyLM( 81 | [ 82 | { 83 | "reasoning": "Reason_A", 84 | "generated_code": "```python\nresult = 1+0/0\nfinal_answer({'answer': result})\n```", 85 | }, 86 | { 87 | "reasoning": "Reason_B", 88 | "generated_code": "```python\nresult = 1+1\nfinal_answer({'answer': result})\n```", 89 | }, 90 | {"reasoning": "Reason_C", "answer": "2"}, 91 | ] 92 | ) 93 | dspy.settings.configure(lm=lm) 94 | pot = ProgramOfThought(BasicQA) 95 | res = pot(question="What is 1+1?") 96 | assert res.answer == "2" 97 | assert pot.interpreter.deno_process is None 98 | 99 | 100 | @pytest.mark.skipif(not is_deno_available, reason="Deno is not installed or not in PATH") 101 | def test_pot_code_generation_persistent_errors(): 102 | max_iters = 3 103 | lm = DummyLM( 104 | [ 105 | { 106 | "reasoning": "Reason_A", 107 | "generated_code": "```python\nresult = 1+0/0\nfinal_answer({'answer': result})\n```", 108 | }, 109 | ] 110 | * max_iters 111 | ) 112 | dspy.settings.configure(lm=lm) 113 | 114 | pot = ProgramOfThought(BasicQA, max_iters=max_iters) 115 | with pytest.raises(RuntimeError, match="Max hops reached. Failed to run ProgramOfThought: ZeroDivisionError:"): 116 | pot(question="What is 1+1?") 117 | assert pot.interpreter.deno_process is None 118 | 119 | 120 | def test_pot_code_parse_error(): 121 | max_iters = 3 122 | lm = DummyLM( 123 | [ 124 | {"reasoning": "Reason_A", "generated_code": "```python\ninvalid=python=code\n```"}, 125 | ] 126 | * max_iters 127 | ) 128 | dspy.settings.configure(lm=lm) 129 | pot = ProgramOfThought(BasicQA, max_iters=max_iters) 130 | with ( 131 | patch("dspy.predict.program_of_thought.ProgramOfThought._execute_code") as mock_execute_code, 132 | pytest.raises( 133 | RuntimeError, match="Max hops reached. Failed to run ProgramOfThought: Error: Code format is not correct." 134 | ), 135 | ): 136 | pot(question="What is 1+1?") 137 | mock_execute_code.assert_not_called() 138 | ``` -------------------------------------------------------------------------------- /tests/examples/test_baleen.py: -------------------------------------------------------------------------------- ```python 1 | import dspy 2 | import dspy.evaluate 3 | from dspy.datasets import HotPotQA 4 | from dspy.dsp.utils import deduplicate 5 | from dspy.evaluate.evaluate import Evaluate 6 | from dspy.teleprompt.bootstrap import BootstrapFewShot 7 | 8 | 9 | class GenerateAnswer(dspy.Signature): 10 | """Answer questions with short factoid answers.""" 11 | 12 | context = dspy.InputField(desc="may contain relevant facts") 13 | question = dspy.InputField() 14 | answer = dspy.OutputField(desc="often between 1 and 5 words") 15 | 16 | 17 | class GenerateSearchQuery(dspy.Signature): 18 | """Write a simple search query that will help answer a complex question.""" 19 | 20 | context = dspy.InputField(desc="may contain relevant facts") 21 | question = dspy.InputField() 22 | query = dspy.OutputField() 23 | 24 | 25 | class SimplifiedBaleen(dspy.Module): 26 | def __init__(self, passages_per_hop=3, max_hops=2): 27 | super().__init__() 28 | 29 | self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)] 30 | self.retrieve = dspy.Retrieve(k=passages_per_hop) 31 | self.generate_answer = dspy.ChainOfThought(GenerateAnswer) 32 | self.max_hops = max_hops 33 | 34 | def forward(self, question): 35 | context = [] 36 | 37 | for hop in range(self.max_hops): 38 | query = self.generate_query[hop](context=context, question=question).query 39 | passages = self.retrieve(query).passages 40 | context = deduplicate(context + passages) 41 | 42 | pred = self.generate_answer(context=context, question=question) 43 | return dspy.Prediction(context=context, answer=pred.answer) 44 | 45 | 46 | def load_hotpotqa(): 47 | # Load the dataset. 48 | dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0) 49 | # Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata. 50 | trainset = [x.with_inputs("question") for x in dataset.train] 51 | devset = [x.with_inputs("question") for x in dataset.dev] 52 | return trainset, devset 53 | 54 | 55 | # @pytest.mark.slow_test 56 | # TODO: Find a way to make this test run without openai 57 | def _test_baleen(): 58 | lm = dspy.OpenAI(model="gpt-3.5-turbo") 59 | rm = dspy.ColBERTv2(url="http://20.102.90.50:2017/wiki17_abstracts") 60 | dspy.settings.configure(lm=lm, rm=rm) 61 | 62 | # Ask any question you like to this simple RAG program. 63 | my_question = "How many storeys are in the castle that David Gregory inherited?" 64 | 65 | # Get the prediction. This contains `pred.context` and `pred.answer`. 66 | uncompiled_baleen = SimplifiedBaleen() # uncompiled (i.e., zero-shot) program 67 | pred = uncompiled_baleen(my_question) 68 | 69 | assert pred.answer == "five" 70 | 71 | 72 | def validate_context_and_answer_and_hops(example, pred, trace=None): 73 | if not dspy.evaluate.answer_exact_match(example, pred): 74 | return False 75 | if not dspy.evaluate.answer_passage_match(example, pred): 76 | return False 77 | 78 | hops = [example.question] + [outputs.query for *_, outputs in trace if "query" in outputs] 79 | 80 | if max([len(h) for h in hops]) > 100: 81 | return False 82 | if any(dspy.evaluate.answer_exact_match_str(hops[idx], hops[:idx], frac=0.8) for idx in range(2, len(hops))): 83 | return False 84 | 85 | return True 86 | 87 | 88 | def gold_passages_retrieved(example, pred, trace=None): 89 | gold_titles = set(map(dspy.evaluate.normalize_text, example["gold_titles"])) 90 | found_titles = set(map(dspy.evaluate.normalize_text, [c.split(" | ")[0] for c in pred.context])) 91 | 92 | return gold_titles.issubset(found_titles) 93 | 94 | 95 | # @pytest.mark.slow_test 96 | # TODO: Find a way to make this test run without the slow hotpotqa dataset 97 | def _test_compiled_baleen(): 98 | trainset, devset = load_hotpotqa() 99 | lm = dspy.OpenAI(model="gpt-3.5-turbo") 100 | rm = dspy.ColBERTv2(url="http://20.102.90.50:2017/wiki17_abstracts") 101 | dspy.settings.configure(lm=lm, rm=rm) 102 | 103 | uncompiled_baleen = SimplifiedBaleen() # uncompiled (i.e., zero-shot) program 104 | 105 | teleprompter = BootstrapFewShot(metric=validate_context_and_answer_and_hops) 106 | compiled_baleen = teleprompter.compile( 107 | SimplifiedBaleen(), 108 | teacher=SimplifiedBaleen(passages_per_hop=2), 109 | trainset=trainset, 110 | ) 111 | 112 | evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5) 113 | uncompiled_baleen_retrieval_score = evaluate_on_hotpotqa( 114 | uncompiled_baleen, metric=gold_passages_retrieved, display=False 115 | ) 116 | # assert uncompiled_baleen_retrieval_score / 100 == 18 / 50 117 | 118 | compiled_baleen_retrieval_score = evaluate_on_hotpotqa(compiled_baleen, metric=gold_passages_retrieved) 119 | # assert compiled_baleen_retrieval_score / 100 == 27 / 50 120 | assert uncompiled_baleen_retrieval_score < compiled_baleen_retrieval_score 121 | ``` -------------------------------------------------------------------------------- /tests/utils/test_saving.py: -------------------------------------------------------------------------------- ```python 1 | import logging 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | import dspy 7 | from dspy.utils import DummyLM 8 | 9 | 10 | def test_save_predict(tmp_path): 11 | predict = dspy.Predict("question->answer") 12 | predict.save(tmp_path, save_program=True) 13 | 14 | assert (tmp_path / "metadata.json").exists() 15 | assert (tmp_path / "program.pkl").exists() 16 | 17 | loaded_predict = dspy.load(tmp_path) 18 | assert isinstance(loaded_predict, dspy.Predict) 19 | 20 | assert predict.signature == loaded_predict.signature 21 | 22 | 23 | def test_save_custom_model(tmp_path): 24 | class CustomModel(dspy.Module): 25 | def __init__(self): 26 | self.cot1 = dspy.ChainOfThought("question->refined_question") 27 | self.cot2 = dspy.ChainOfThought("refined_question->answer") 28 | 29 | model = CustomModel() 30 | model.save(tmp_path, save_program=True) 31 | 32 | loaded_model = dspy.load(tmp_path) 33 | assert isinstance(loaded_model, CustomModel) 34 | 35 | assert len(model.predictors()) == len(loaded_model.predictors()) 36 | for predictor, loaded_predictor in zip(model.predictors(), loaded_model.predictors(), strict=False): 37 | assert predictor.signature == loaded_predictor.signature 38 | 39 | 40 | def test_save_model_with_custom_signature(tmp_path): 41 | import datetime 42 | 43 | class MySignature(dspy.Signature): 44 | """Just a custom signature.""" 45 | 46 | current_date: datetime.date = dspy.InputField() 47 | target_date: datetime.date = dspy.InputField() 48 | date_diff: int = dspy.OutputField(desc="The difference in days between the current_date and the target_date") 49 | 50 | predict = dspy.Predict(MySignature) 51 | predict.signature = predict.signature.with_instructions("You are a helpful assistant.") 52 | predict.save(tmp_path, save_program=True) 53 | 54 | loaded_predict = dspy.load(tmp_path) 55 | assert isinstance(loaded_predict, dspy.Predict) 56 | 57 | assert predict.signature == loaded_predict.signature 58 | 59 | 60 | @pytest.mark.extra 61 | def test_save_compiled_model(tmp_path): 62 | predict = dspy.Predict("question->answer") 63 | dspy.settings.configure(lm=DummyLM([{"answer": "blue"}, {"answer": "white"}] * 10)) 64 | 65 | trainset = [ 66 | {"question": "What is the color of the sky?", "answer": "blue"}, 67 | {"question": "What is the color of the ocean?", "answer": "blue"}, 68 | {"question": "What is the color of the milk?", "answer": "white"}, 69 | {"question": "What is the color of the coffee?", "answer": "black"}, 70 | ] 71 | trainset = [dspy.Example(**example).with_inputs("question") for example in trainset] 72 | 73 | def dummy_metric(example, pred, trace=None): 74 | return True 75 | 76 | optimizer = dspy.BootstrapFewShot(max_bootstrapped_demos=4, max_labeled_demos=4, max_rounds=5, metric=dummy_metric) 77 | compiled_predict = optimizer.compile(predict, trainset=trainset) 78 | compiled_predict.save(tmp_path, save_program=True) 79 | 80 | loaded_predict = dspy.load(tmp_path) 81 | assert compiled_predict.demos == loaded_predict.demos 82 | assert compiled_predict.signature == loaded_predict.signature 83 | 84 | 85 | def test_load_with_version_mismatch(tmp_path): 86 | from dspy.utils.saving import logger 87 | 88 | # Mock versions during save 89 | save_versions = {"python": "3.9", "dspy": "2.4.0", "cloudpickle": "2.0"} 90 | 91 | # Mock versions during load 92 | load_versions = {"python": "3.10", "dspy": "2.5.0", "cloudpickle": "2.1"} 93 | 94 | predict = dspy.Predict("question->answer") 95 | 96 | # Create a custom handler to capture log messages 97 | class ListHandler(logging.Handler): 98 | def __init__(self): 99 | super().__init__() 100 | self.messages = [] 101 | 102 | def emit(self, record): 103 | self.messages.append(record.getMessage()) 104 | 105 | # Add handler and set level 106 | handler = ListHandler() 107 | original_level = logger.level 108 | logger.addHandler(handler) 109 | logger.setLevel(logging.WARNING) 110 | 111 | try: 112 | # Mock version during save 113 | with patch("dspy.primitives.base_module.get_dependency_versions", return_value=save_versions): 114 | predict.save(tmp_path, save_program=True) 115 | 116 | # Mock version during load 117 | with patch("dspy.utils.saving.get_dependency_versions", return_value=load_versions): 118 | loaded_predict = dspy.load(tmp_path) 119 | 120 | # Assert warnings were logged, and one warning for each mismatched dependency. 121 | assert len(handler.messages) == 3 122 | 123 | for msg in handler.messages: 124 | assert "There is a mismatch of" in msg 125 | 126 | # Verify the model still loads correctly despite version mismatches 127 | assert isinstance(loaded_predict, dspy.Predict) 128 | assert predict.signature == loaded_predict.signature 129 | 130 | finally: 131 | # Clean up: restore original level and remove handler 132 | logger.setLevel(original_level) 133 | logger.removeHandler(handler) 134 | ``` -------------------------------------------------------------------------------- /docs/docs/learn/programming/mcp.md: -------------------------------------------------------------------------------- ```markdown 1 | --- 2 | sidebar_position: 3 3 | --- 4 | 5 | # Model Context Protocol (MCP) 6 | 7 | The [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) is an open protocol that standardizes how applications provide context to language models. DSPy supports MCP, allowing you to use tools from any MCP server with DSPy agents. 8 | 9 | ## Installation 10 | 11 | Install DSPy with MCP support: 12 | 13 | ```bash 14 | pip install -U "dspy[mcp]" 15 | ``` 16 | 17 | ## Overview 18 | 19 | MCP enables you to: 20 | 21 | - **Use standardized tools** - Connect to any MCP-compatible server. 22 | - **Share tools across stacks** - Use the same tools across different frameworks. 23 | - **Simplify integration** - Convert MCP tools to DSPy tools with one line. 24 | 25 | DSPy does not handle MCP server connections directly. You can use client interfaces of the `mcp` library to establish the connection and pass `mcp.ClientSession` to `dspy.Tool.from_mcp_tool` in order to convert mcp tools into DSPy tools. 26 | 27 | ## Using MCP with DSPy 28 | 29 | ### 1. HTTP Server (Remote) 30 | 31 | For remote MCP servers over HTTP, use the streamable HTTP transport: 32 | 33 | ```python 34 | import asyncio 35 | import dspy 36 | from mcp import ClientSession 37 | from mcp.client.streamable_http import streamablehttp_client 38 | 39 | async def main(): 40 | # Connect to HTTP MCP server 41 | async with streamablehttp_client("http://localhost:8000/mcp") as (read, write): 42 | async with ClientSession(read, write) as session: 43 | # Initialize the session 44 | await session.initialize() 45 | 46 | # List and convert tools 47 | response = await session.list_tools() 48 | dspy_tools = [ 49 | dspy.Tool.from_mcp_tool(session, tool) 50 | for tool in response.tools 51 | ] 52 | 53 | # Create and use ReAct agent 54 | class TaskSignature(dspy.Signature): 55 | task: str = dspy.InputField() 56 | result: str = dspy.OutputField() 57 | 58 | react_agent = dspy.ReAct( 59 | signature=TaskSignature, 60 | tools=dspy_tools, 61 | max_iters=5 62 | ) 63 | 64 | result = await react_agent.acall(task="Check the weather in Tokyo") 65 | print(result.result) 66 | 67 | asyncio.run(main()) 68 | ``` 69 | 70 | ### 2. Stdio Server (Local Process) 71 | 72 | The most common way to use MCP is with a local server process communicating via stdio: 73 | 74 | ```python 75 | import asyncio 76 | import dspy 77 | from mcp import ClientSession, StdioServerParameters 78 | from mcp.client.stdio import stdio_client 79 | 80 | async def main(): 81 | # Configure the stdio server 82 | server_params = StdioServerParameters( 83 | command="python", # Command to run 84 | args=["path/to/your/mcp_server.py"], # Server script path 85 | env=None, # Optional environment variables 86 | ) 87 | 88 | # Connect to the server 89 | async with stdio_client(server_params) as (read, write): 90 | async with ClientSession(read, write) as session: 91 | # Initialize the session 92 | await session.initialize() 93 | 94 | # List available tools 95 | response = await session.list_tools() 96 | 97 | # Convert MCP tools to DSPy tools 98 | dspy_tools = [ 99 | dspy.Tool.from_mcp_tool(session, tool) 100 | for tool in response.tools 101 | ] 102 | 103 | # Create a ReAct agent with the tools 104 | class QuestionAnswer(dspy.Signature): 105 | """Answer questions using available tools.""" 106 | question: str = dspy.InputField() 107 | answer: str = dspy.OutputField() 108 | 109 | react_agent = dspy.ReAct( 110 | signature=QuestionAnswer, 111 | tools=dspy_tools, 112 | max_iters=5 113 | ) 114 | 115 | # Use the agent 116 | result = await react_agent.acall( 117 | question="What is 25 + 17?" 118 | ) 119 | print(result.answer) 120 | 121 | # Run the async function 122 | asyncio.run(main()) 123 | ``` 124 | 125 | ## Tool Conversion 126 | 127 | DSPy automatically handles the conversion from MCP tools to DSPy tools: 128 | 129 | ```python 130 | # MCP tool from session 131 | mcp_tool = response.tools[0] 132 | 133 | # Convert to DSPy tool 134 | dspy_tool = dspy.Tool.from_mcp_tool(session, mcp_tool) 135 | 136 | # The DSPy tool preserves: 137 | # - Tool name and description 138 | # - Parameter schemas and types 139 | # - Argument descriptions 140 | # - Async execution support 141 | 142 | # Use it like any DSPy tool 143 | result = await dspy_tool.acall(param1="value", param2=123) 144 | ``` 145 | 146 | ## Learn More 147 | 148 | - [MCP Official Documentation](https://modelcontextprotocol.io/) 149 | - [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk) 150 | - [DSPy MCP Tutorial](https://dspy.ai/tutorials/mcp/) 151 | - [DSPy Tools Documentation](./tools.md) 152 | 153 | MCP integration in DSPy makes it easy to use standardized tools from any MCP server, enabling powerful agent capabilities with minimal setup. 154 | ``` -------------------------------------------------------------------------------- /.github/workflows/run_tests.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Lint, Test, and Build 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | types: [opened, synchronize, reopened] 9 | 10 | jobs: 11 | fix: 12 | name: Check Ruff Fix 13 | runs-on: ubuntu-latest 14 | permissions: 15 | contents: write 16 | pull-requests: write 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: actions/setup-python@v5 20 | with: 21 | python-version: "3.11" 22 | - name: Install uv with caching 23 | uses: astral-sh/setup-uv@v5 24 | with: 25 | enable-cache: true 26 | cache-dependency-glob: | 27 | **/pyproject.toml 28 | **/uv.lock 29 | - name: Create and activate virtual environment 30 | run: | 31 | uv venv .venv 32 | echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH 33 | - name: Install dependencies 34 | run: uv sync --dev -p .venv --extra dev 35 | - name: Ruff Check 36 | run: | 37 | ruff check --fix-only --diff --exit-non-zero-on-fix || ( 38 | echo "" 39 | echo "❌ Ruff found issues that can be fixed automatically." 40 | echo "💡 To fix them locally, run:" 41 | echo "" 42 | echo " pre-commit run --all-files" 43 | echo "" 44 | echo "Then commit and push the changes." 45 | exit 1 46 | ) 47 | 48 | test: 49 | name: Run Tests 50 | runs-on: ubuntu-latest 51 | strategy: 52 | matrix: 53 | python-version: ["3.10", "3.11", "3.12", "3.13"] 54 | steps: 55 | - uses: actions/checkout@v4 56 | - uses: actions/setup-python@v5 57 | with: 58 | python-version: ${{ matrix.python-version }} 59 | - name: Install Deno 60 | run: | 61 | curl -fsSL https://deno.land/install.sh | sh 62 | echo "${HOME}/.deno/bin" >> $GITHUB_PATH 63 | - name: Verify Deno installation 64 | run: deno --version 65 | - name: Install uv with caching 66 | uses: astral-sh/setup-uv@v5 67 | with: 68 | enable-cache: true 69 | cache-dependency-glob: | 70 | **/pyproject.toml 71 | **/uv.lock 72 | - name: Create and activate virtual environment 73 | run: | 74 | uv venv .venv 75 | echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH 76 | - name: Install dependencies 77 | run: | 78 | uv sync --dev -p .venv --extra dev 79 | uv pip list 80 | - name: Run lint with tests 81 | uses: chartboost/ruff-action@v1 82 | with: 83 | args: check --fix-only 84 | - name: Run tests with pytest 85 | run: uv run -p .venv pytest -vv tests/ 86 | - name: Install optional dependencies 87 | run: uv sync -p .venv --extra dev --extra test_extras 88 | - name: Run extra tests 89 | run: uv run -p .venv pytest tests/ -m extra --extra 90 | 91 | llm_call_test: 92 | name: Run Tests with Real LM 93 | runs-on: ubuntu-latest 94 | services: 95 | ollama: 96 | image: ollama/ollama:latest 97 | ports: 98 | - 11434:11434 99 | steps: 100 | - uses: actions/checkout@v4 101 | - uses: actions/setup-python@v5 102 | with: 103 | python-version: 3.11 104 | - name: Install uv with caching 105 | uses: astral-sh/setup-uv@v5 106 | with: 107 | enable-cache: true 108 | cache-dependency-glob: | 109 | **/pyproject.toml 110 | **/uv.lock 111 | - name: Create and activate virtual environment 112 | run: | 113 | uv venv .venv 114 | echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH 115 | - name: Install dependencies 116 | run: | 117 | uv sync --dev -p .venv --extra dev 118 | uv pip list 119 | - name: Pull LLM 120 | run: | 121 | timeout 60 bash -c 'until curl -f http://localhost:11434/api/version; do sleep 2; done' 122 | curl -X POST http://localhost:11434/api/pull \ 123 | -H "Content-Type: application/json" \ 124 | -d '{"name": "llama3.2:3b"}' 125 | echo "LM_FOR_TEST=ollama/llama3.2:3b" >> $GITHUB_ENV 126 | - name: Run tests 127 | run: uv run -p .venv pytest -m llm_call --llm_call -vv --durations=5 tests/ 128 | 129 | build_package: 130 | name: Build Package 131 | runs-on: ubuntu-latest 132 | strategy: 133 | matrix: 134 | python-version: ["3.10", "3.11", "3.12", "3.13"] 135 | steps: 136 | - uses: actions/checkout@v4 137 | - uses: actions/setup-python@v5 138 | with: 139 | python-version: ${{ matrix.python-version }} 140 | - name: Install uv with caching 141 | uses: astral-sh/setup-uv@v5 142 | with: 143 | enable-cache: true 144 | cache-dependency-glob: | 145 | **/pyproject.toml 146 | **/uv.lock 147 | - name: Create and activate virtual environment 148 | run: | 149 | uv venv .venv 150 | echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH 151 | - name: Install dependencies 152 | run: uv sync --dev -p .venv --extra dev 153 | - name: Build 154 | run: uv run -p .venv python -m build 155 | - name: Install built package 156 | run: uv pip install dist/*.whl -p .venv 157 | - name: Test import dspy 158 | run: uv run -p .venv python -c "import dspy" 159 | ```