This is page 1 of 14. Use http://codebase.md/stanfordnlp/dspy?page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── .internal_dspyai │ │ ├── internals │ │ │ ├── build-and-release.md │ │ │ └── release-checklist.md │ │ └── pyproject.toml │ ├── .tmp │ │ └── .generated-actions │ │ └── run-pypi-publish-in-docker-container │ │ └── action.yml │ ├── ISSUE_TEMPLATE │ │ ├── bug_report.yml │ │ └── feature_request.yml │ ├── PULL_REQUEST_TEMPLATE │ │ └── pull_request_template.md │ ├── workflow_scripts │ │ └── install_testpypi_pkg.sh │ └── workflows │ ├── build_and_release.yml │ ├── build_utils │ │ └── test_version.py │ ├── docs-push.yml │ ├── precommits_check.yml │ └── run_tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── docs │ ├── .gitignore │ ├── docs │ │ ├── api │ │ │ ├── adapters │ │ │ │ ├── Adapter.md │ │ │ │ ├── ChatAdapter.md │ │ │ │ ├── JSONAdapter.md │ │ │ │ └── TwoStepAdapter.md │ │ │ ├── evaluation │ │ │ │ ├── answer_exact_match.md │ │ │ │ ├── answer_passage_match.md │ │ │ │ ├── CompleteAndGrounded.md │ │ │ │ ├── Evaluate.md │ │ │ │ ├── EvaluationResult.md │ │ │ │ └── SemanticF1.md │ │ │ ├── experimental │ │ │ │ ├── Citations.md │ │ │ │ └── Document.md │ │ │ ├── index.md │ │ │ ├── models │ │ │ │ ├── Embedder.md │ │ │ │ └── LM.md │ │ │ ├── modules │ │ │ │ ├── BestOfN.md │ │ │ │ ├── ChainOfThought.md │ │ │ │ ├── CodeAct.md │ │ │ │ ├── Module.md │ │ │ │ ├── MultiChainComparison.md │ │ │ │ ├── Parallel.md │ │ │ │ ├── Predict.md │ │ │ │ ├── ProgramOfThought.md │ │ │ │ ├── ReAct.md │ │ │ │ └── Refine.md │ │ │ ├── optimizers │ │ │ │ ├── BetterTogether.md │ │ │ │ ├── BootstrapFewShot.md │ │ │ │ ├── BootstrapFewShotWithRandomSearch.md │ │ │ │ ├── BootstrapFinetune.md │ │ │ │ ├── BootstrapRS.md │ │ │ │ ├── COPRO.md │ │ │ │ ├── Ensemble.md │ │ │ │ ├── GEPA │ │ │ │ │ ├── GEPA_Advanced.md │ │ │ │ │ └── overview.md │ │ │ │ ├── InferRules.md │ │ │ │ ├── KNN.md │ │ │ │ ├── KNNFewShot.md │ │ │ │ ├── LabeledFewShot.md │ │ │ │ ├── MIPROv2.md │ │ │ │ └── SIMBA.md │ │ │ ├── primitives │ │ │ │ ├── Audio.md │ │ │ │ ├── Code.md │ │ │ │ ├── Example.md │ │ │ │ ├── History.md │ │ │ │ ├── Image.md │ │ │ │ ├── Prediction.md │ │ │ │ ├── Tool.md │ │ │ │ └── ToolCalls.md │ │ │ ├── signatures │ │ │ │ ├── InputField.md │ │ │ │ ├── OutputField.md │ │ │ │ └── Signature.md │ │ │ ├── tools │ │ │ │ ├── ColBERTv2.md │ │ │ │ ├── Embeddings.md │ │ │ │ └── PythonInterpreter.md │ │ │ └── utils │ │ │ ├── asyncify.md │ │ │ ├── configure_cache.md │ │ │ ├── disable_litellm_logging.md │ │ │ ├── disable_logging.md │ │ │ ├── enable_litellm_logging.md │ │ │ ├── enable_logging.md │ │ │ ├── inspect_history.md │ │ │ ├── load.md │ │ │ ├── StatusMessage.md │ │ │ ├── StatusMessageProvider.md │ │ │ ├── streamify.md │ │ │ └── StreamListener.md │ │ ├── cheatsheet.md │ │ ├── community │ │ │ ├── community-resources.md │ │ │ ├── how-to-contribute.md │ │ │ └── use-cases.md │ │ ├── deep-dive │ │ │ └── data-handling │ │ │ ├── built-in-datasets.md │ │ │ ├── examples.md │ │ │ ├── img │ │ │ │ └── data-loading.png │ │ │ └── loading-custom-data.md │ │ ├── faqs.md │ │ ├── index.md │ │ ├── js │ │ │ └── runllm-widget.js │ │ ├── learn │ │ │ ├── evaluation │ │ │ │ ├── data.md │ │ │ │ ├── metrics.md │ │ │ │ └── overview.md │ │ │ ├── figures │ │ │ │ ├── native_tool_call.png │ │ │ │ └── teleprompter-classes.png │ │ │ ├── index.md │ │ │ ├── optimization │ │ │ │ ├── optimizers.md │ │ │ │ └── overview.md │ │ │ └── programming │ │ │ ├── 7-assertions.md │ │ │ ├── adapters.md │ │ │ ├── language_models.md │ │ │ ├── mcp.md │ │ │ ├── modules.md │ │ │ ├── overview.md │ │ │ ├── signatures.md │ │ │ └── tools.md │ │ ├── production │ │ │ └── index.md │ │ ├── roadmap.md │ │ ├── static │ │ │ ├── .nojekyll │ │ │ └── img │ │ │ ├── dspy_logo.png │ │ │ ├── logo.png │ │ │ ├── mlflow-tracing-rag.png │ │ │ ├── modular.png │ │ │ ├── optimize.png │ │ │ ├── undraw_docusaurus_mountain.svg │ │ │ ├── undraw_docusaurus_react.svg │ │ │ ├── undraw_docusaurus_tree.svg │ │ │ └── universal_compatibility.png │ │ ├── stylesheets │ │ │ └── extra.css │ │ └── tutorials │ │ ├── agents │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-agent.png │ │ ├── ai_text_game │ │ │ └── index.md │ │ ├── async │ │ │ └── index.md │ │ ├── audio │ │ │ └── index.ipynb │ │ ├── build_ai_program │ │ │ └── index.md │ │ ├── cache │ │ │ └── index.md │ │ ├── classification │ │ │ └── index.md │ │ ├── classification_finetuning │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-classification.png │ │ ├── conversation_history │ │ │ └── index.md │ │ ├── core_development │ │ │ └── index.md │ │ ├── custom_module │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-custom-module.png │ │ ├── customer_service_agent │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-customer-service-agent.png │ │ ├── deployment │ │ │ ├── dspy_mlflow_ui.png │ │ │ └── index.md │ │ ├── email_extraction │ │ │ ├── index.md │ │ │ └── mlflow-tracing-email-extraction.png │ │ ├── entity_extraction │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-entity-extraction.png │ │ ├── games │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-agent.png │ │ ├── gepa_ai_program │ │ │ └── index.md │ │ ├── gepa_aime │ │ │ ├── index.ipynb │ │ │ ├── mlflow-tracing-gepa-aime.png │ │ │ └── mlflow-tracking-gepa-aime-optimization.png │ │ ├── gepa_facilitysupportanalyzer │ │ │ ├── index.ipynb │ │ │ ├── mlflow-tracing-gepa-support.png │ │ │ └── mlflow-tracking-gepa-support-optimization.png │ │ ├── gepa_papillon │ │ │ ├── index.ipynb │ │ │ ├── mlflow-tracing-gepa-papilon.png │ │ │ └── mlflow-tracking-gepa-papilon-optimization.png │ │ ├── image_generation_prompting │ │ │ └── index.ipynb │ │ ├── index.md │ │ ├── llms_txt_generation │ │ │ └── index.md │ │ ├── math │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-math.png │ │ ├── mcp │ │ │ └── index.md │ │ ├── mem0_react_agent │ │ │ └── index.md │ │ ├── multihop_search │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-multi-hop.png │ │ ├── observability │ │ │ ├── index.md │ │ │ ├── mlflow_trace_ui_navigation.gif │ │ │ ├── mlflow_trace_ui.png │ │ │ └── mlflow_trace_view.png │ │ ├── optimize_ai_program │ │ │ └── index.md │ │ ├── optimizer_tracking │ │ │ ├── child_run.png │ │ │ ├── experiment.png │ │ │ ├── index.md │ │ │ └── parent_run.png │ │ ├── output_refinement │ │ │ └── best-of-n-and-refine.md │ │ ├── papillon │ │ │ └── index.md │ │ ├── program_of_thought │ │ │ └── index.ipynb │ │ ├── rag │ │ │ ├── index.ipynb │ │ │ └── mlflow-tracing-rag.png │ │ ├── real_world_examples │ │ │ └── index.md │ │ ├── rl_ai_program │ │ │ └── index.md │ │ ├── rl_multihop │ │ │ └── index.ipynb │ │ ├── rl_papillon │ │ │ └── index.ipynb │ │ ├── sample_code_generation │ │ │ └── index.md │ │ ├── saving │ │ │ └── index.md │ │ ├── streaming │ │ │ └── index.md │ │ ├── tool_use │ │ │ └── index.ipynb │ │ └── yahoo_finance_react │ │ └── index.md │ ├── mkdocs.yml │ ├── overrides │ │ ├── home.html │ │ ├── main.html │ │ └── partials │ │ └── tabs.html │ ├── Pipfile │ ├── Pipfile.lock │ ├── README.md │ ├── requirements.txt │ ├── scripts │ │ ├── generate_api_docs.py │ │ └── generate_api_summary.py │ └── vercel.json ├── dspy │ ├── __init__.py │ ├── __metadata__.py │ ├── adapters │ │ ├── __init__.py │ │ ├── baml_adapter.py │ │ ├── base.py │ │ ├── chat_adapter.py │ │ ├── json_adapter.py │ │ ├── two_step_adapter.py │ │ ├── types │ │ │ ├── __init__.py │ │ │ ├── audio.py │ │ │ ├── base_type.py │ │ │ ├── citation.py │ │ │ ├── code.py │ │ │ ├── document.py │ │ │ ├── history.py │ │ │ ├── image.py │ │ │ └── tool.py │ │ ├── utils.py │ │ └── xml_adapter.py │ ├── clients │ │ ├── __init__.py │ │ ├── base_lm.py │ │ ├── cache.py │ │ ├── databricks.py │ │ ├── embedding.py │ │ ├── lm_local_arbor.py │ │ ├── lm_local.py │ │ ├── lm.py │ │ ├── openai.py │ │ ├── provider.py │ │ └── utils_finetune.py │ ├── datasets │ │ ├── __init__.py │ │ ├── alfworld │ │ │ ├── __init__.py │ │ │ ├── alfworld.py │ │ │ └── base_config.yml │ │ ├── colors.py │ │ ├── dataloader.py │ │ ├── dataset.py │ │ ├── gsm8k.py │ │ ├── hotpotqa.py │ │ └── math.py │ ├── dsp │ │ ├── __init__.py │ │ ├── colbertv2.py │ │ └── utils │ │ ├── __init__.py │ │ ├── dpr.py │ │ ├── settings.py │ │ └── utils.py │ ├── evaluate │ │ ├── __init__.py │ │ ├── auto_evaluation.py │ │ ├── evaluate.py │ │ └── metrics.py │ ├── experimental │ │ └── __init__.py │ ├── predict │ │ ├── __init__.py │ │ ├── aggregation.py │ │ ├── avatar │ │ │ ├── __init__.py │ │ │ ├── avatar.py │ │ │ ├── models.py │ │ │ └── signatures.py │ │ ├── best_of_n.py │ │ ├── chain_of_thought.py │ │ ├── code_act.py │ │ ├── knn.py │ │ ├── multi_chain_comparison.py │ │ ├── parallel.py │ │ ├── parameter.py │ │ ├── predict.py │ │ ├── program_of_thought.py │ │ ├── react.py │ │ ├── refine.py │ │ └── retry.py │ ├── primitives │ │ ├── __init__.py │ │ ├── base_module.py │ │ ├── example.py │ │ ├── module.py │ │ ├── prediction.py │ │ ├── python_interpreter.py │ │ └── runner.js │ ├── propose │ │ ├── __init__.py │ │ ├── dataset_summary_generator.py │ │ ├── grounded_proposer.py │ │ ├── propose_base.py │ │ └── utils.py │ ├── retrievers │ │ ├── __init__.py │ │ ├── databricks_rm.py │ │ ├── embeddings.py │ │ ├── retrieve.py │ │ └── weaviate_rm.py │ ├── signatures │ │ ├── __init__.py │ │ ├── field.py │ │ ├── signature.py │ │ └── utils.py │ ├── streaming │ │ ├── __init__.py │ │ ├── messages.py │ │ ├── streamify.py │ │ └── streaming_listener.py │ ├── teleprompt │ │ ├── __init__.py │ │ ├── avatar_optimizer.py │ │ ├── bettertogether.py │ │ ├── bootstrap_finetune.py │ │ ├── bootstrap_trace.py │ │ ├── bootstrap.py │ │ ├── copro_optimizer.py │ │ ├── ensemble.py │ │ ├── gepa │ │ │ ├── __init__.py │ │ │ ├── gepa_utils.py │ │ │ ├── gepa.py │ │ │ └── instruction_proposal.py │ │ ├── grpo.py │ │ ├── infer_rules.py │ │ ├── knn_fewshot.py │ │ ├── mipro_optimizer_v2.py │ │ ├── random_search.py │ │ ├── signature_opt.py │ │ ├── simba_utils.py │ │ ├── simba.py │ │ ├── teleprompt_optuna.py │ │ ├── teleprompt.py │ │ ├── utils.py │ │ └── vanilla.py │ └── utils │ ├── __init__.py │ ├── annotation.py │ ├── asyncify.py │ ├── caching.py │ ├── callback.py │ ├── dummies.py │ ├── exceptions.py │ ├── hasher.py │ ├── inspect_history.py │ ├── langchain_tool.py │ ├── logging_utils.py │ ├── mcp.py │ ├── parallelizer.py │ ├── saving.py │ ├── syncify.py │ ├── unbatchify.py │ └── usage_tracker.py ├── LICENSE ├── pyproject.toml ├── README.md ├── tests │ ├── __init__.py │ ├── adapters │ │ ├── test_adapter_utils.py │ │ ├── test_baml_adapter.py │ │ ├── test_base_type.py │ │ ├── test_chat_adapter.py │ │ ├── test_citation.py │ │ ├── test_code.py │ │ ├── test_document.py │ │ ├── test_json_adapter.py │ │ ├── test_tool.py │ │ ├── test_two_step_adapter.py │ │ └── test_xml_adapter.py │ ├── callback │ │ └── test_callback.py │ ├── clients │ │ ├── test_cache.py │ │ ├── test_databricks.py │ │ ├── test_embedding.py │ │ ├── test_inspect_global_history.py │ │ └── test_lm.py │ ├── conftest.py │ ├── datasets │ │ └── test_dataset.py │ ├── docs │ │ └── test_mkdocs_links.py │ ├── evaluate │ │ ├── test_evaluate.py │ │ └── test_metrics.py │ ├── examples │ │ └── test_baleen.py │ ├── metadata │ │ └── test_metadata.py │ ├── predict │ │ ├── test_aggregation.py │ │ ├── test_best_of_n.py │ │ ├── test_chain_of_thought.py │ │ ├── test_code_act.py │ │ ├── test_knn.py │ │ ├── test_multi_chain_comparison.py │ │ ├── test_parallel.py │ │ ├── test_predict.py │ │ ├── test_program_of_thought.py │ │ ├── test_react.py │ │ ├── test_refine.py │ │ └── test_retry.py │ ├── primitives │ │ ├── resources │ │ │ └── saved_program.json │ │ ├── test_base_module.py │ │ ├── test_example.py │ │ ├── test_module.py │ │ └── test_python_interpreter.py │ ├── propose │ │ └── test_grounded_proposer.py │ ├── README.md │ ├── reliability │ │ ├── __init__.py │ │ ├── complex_types │ │ │ └── generated │ │ │ ├── test_many_types_1 │ │ │ │ ├── inputs │ │ │ │ │ ├── input1.json │ │ │ │ │ └── input2.json │ │ │ │ ├── program.py │ │ │ │ └── schema.json │ │ │ ├── test_nesting_1 │ │ │ │ ├── inputs │ │ │ │ │ ├── input1.json │ │ │ │ │ └── input2.json │ │ │ │ ├── program.py │ │ │ │ └── schema.json │ │ │ └── test_nesting_2 │ │ │ ├── inputs │ │ │ │ └── input1.json │ │ │ ├── program.py │ │ │ └── schema.json │ │ ├── conftest.py │ │ ├── generate │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ └── utils.py │ │ ├── input_formats │ │ │ └── generated │ │ │ └── test_markdown_1 │ │ │ ├── inputs │ │ │ │ ├── input1.json │ │ │ │ └── input2.json │ │ │ ├── program.py │ │ │ └── schema.json │ │ ├── README.md │ │ ├── reliability_conf.yaml │ │ ├── test_generated.py │ │ ├── test_pydantic_models.py │ │ └── utils.py │ ├── retrievers │ │ └── test_embeddings.py │ ├── signatures │ │ ├── test_adapter_image.py │ │ ├── test_custom_types.py │ │ └── test_signature.py │ ├── streaming │ │ └── test_streaming.py │ ├── teleprompt │ │ ├── gepa_dummy_lm_custom_component_selector_custom_instruction_proposer.json │ │ ├── gepa_dummy_lm.json │ │ ├── test_bootstrap_finetune.py │ │ ├── test_bootstrap_trace.py │ │ ├── test_bootstrap.py │ │ ├── test_copro_optimizer.py │ │ ├── test_ensemble.py │ │ ├── test_finetune.py │ │ ├── test_gepa_instruction_proposer.py │ │ ├── test_gepa.py │ │ ├── test_grpo.py │ │ ├── test_knn_fewshot.py │ │ ├── test_random_search.py │ │ ├── test_teleprompt.py │ │ └── test_utils.py │ ├── test_utils │ │ ├── __init__.py │ │ └── server │ │ ├── __init__.py │ │ ├── litellm_server_config.yaml │ │ └── litellm_server.py │ └── utils │ ├── __init__.py │ ├── resources │ │ └── mcp_server.py │ ├── test_annotation.py │ ├── test_asyncify.py │ ├── test_exceptions.py │ ├── test_langchain_tool.py │ ├── test_mcp.py │ ├── test_parallelizer.py │ ├── test_saving.py │ ├── test_settings.py │ ├── test_syncify.py │ ├── test_unbatchify.py │ └── test_usage_tracker.py └── uv.lock ``` # Files -------------------------------------------------------------------------------- /docs/docs/static/.nojekyll: -------------------------------------------------------------------------------- ``` ``` -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- ``` site .cache ``` -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- ```yaml default_language_version: python: python3.10 default_stages: [pre-commit] default_install_hook_types: [pre-commit] repos: - repo: local hooks: - id: ruff-check name: ruff (lint) entry: ruff language: system types_or: [python, pyi] files: ^(dspy|tests)/.*\.py$ exclude: ^(dspy/__metadata__\.py|tests/reliability/.*\.py)$ args: [check, --fix-only] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: check-yaml args: ["--allow-multiple-documents", "--unsafe"] - id: check-toml - id: check-added-large-files args: ["--maxkb=1024"] - id: check-merge-conflict - id: debug-statements ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` /experiments/ /checkpoints/ # /data/ /logs/ /mlruns/ /profiler/ /logs/ /docs/downloads/ /docs/experiments/ /examples/qa/hotpot/MIPRO_notebook_cache/ /examples/nli/scone/MIPRO_notebook_cache/ /examples/nli/scone/ScoNe/ /examples/nli/scone/compiled_program.dspy /examples/qa/hotpot/compiled_program.dspy /ScoNe/ testing/outputs/ testing/playbook.ipynb # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # Vim *.swp # Jupyter Notebook .ipynb_checkpoints # notebooks/ # mac .DS_Store # Other .vscode *.tsv *.pt gpt*.txt *.env local/ local_* build/ *.egg-info/ # *.jsonl # *.json !/data/*.json /dist/ # **/*.pkl checklist.md finetuning_ckpts/ # cache/ * copy* .idea assertion.log *.log *.db /.devcontainer/.personalization.sh .mypy_cache CLAUDE.local.md dummy.csv docs/docs/**/*.json* *.index *.pkl *.tar.gz test_before_pypi/ .github/.internal_dspyai/dist/ ``` -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- ```markdown The tests in this directory are primarily concerned with code correctness and Adapter reliability. If you're looking for testing the end-to-end quality of DSPy modules and optimizer, refer to [LangProBe](https://github.com/Shangyint/langProBe). ``` -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- ```markdown **If you're looking to understand the framework, please go to the [DSPy Docs at dspy.ai](https://dspy.ai)** -------- The content below is focused on how to modify the documentation site. # Modifying the DSPy Documentation This website is built using [Material for MKDocs](https://squidfunk.github.io/mkdocs-material/), a Material UI inspired theme for MKDocs. ## Building docs locally To build and test the documentation locally: 1. Navigate to the `docs` directory: ```bash cd docs ``` 2. Install the necessary dependencies: ```bash pip install -r requirements.txt ``` 3. In docs/ directory, run the command below to generate the API docs and index them: ```bash python scripts/generate_api_docs.py python scripts/generate_api_summary.py ``` 4. (Optional) On MacOS you may also need to install libraries for building the site ```bash brew install cairo freetype libffi libjpeg libpng zlib export DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib ``` 5. Run the build command: ```bash mkdocs build ``` This will generate a static build of the documentation site in the `site` directory. You can then serve this directory to view the site locally using: ```bash mkdocs serve ``` If you see the build failing make sure to fix it before pushing. ## Continuous Integration (CI) Build Checks We have automated build checks set up in our CI pipeline to ensure the documentation builds successfully before merging changes. These checks: 1. Run the `mkdocs build` command 2. Verify that the build completes without errors 3. Help catch potential issues early in the development process If the CI build check fails, please review your changes and ensure the documentation builds correctly locally before pushing updates. ## Contributing to the `docs` Folder This guide is for contributors looking to make changes to the documentation in the `dspy/docs` folder. 1. **Pull the up-to-date version of the website**: Please pull the latest version of the live documentation site via cloning the dspy repo. The current docs are in the `dspy/docs` folder. 2. **Push your new changes on a new branch**: Feel free to add or edit existing documentation and open a PR for your changes. Once your PR is reviewed and approved, the changes will be ready to merge into main. 3. **Updating the website**: Once your changes are merged to main, the changes would be reflected on live websites usually in 5-15 mins. ## LLMs.txt The build process generates an `/llms.txt` file for LLM consumption using [mkdocs-llmstxt](https://github.com/pawamoy/mkdocs-llmstxt). Configure sections in `mkdocs.yml` under the `llmstxt` plugin. ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown <p align="center"> <img align="center" src="docs/docs/static/img/dspy_logo.png" width="460px" /> </p> <p align="left"> ## DSPy: _Programming_—not prompting—Foundation Models **Documentation:** [DSPy Docs](https://dspy.ai/) [](https://pepy.tech/projects/dspy) ---- DSPy is the framework for _programming—rather than prompting—language models_. It allows you to iterate fast on **building modular AI systems** and offers algorithms for **optimizing their prompts and weights**, whether you're building simple classifiers, sophisticated RAG pipelines, or Agent loops. DSPy stands for Declarative Self-improving Python. Instead of brittle prompts, you write compositional _Python code_ and use DSPy to **teach your LM to deliver high-quality outputs**. Learn more via our [official documentation site](https://dspy.ai/) or meet the community, seek help, or start contributing via this GitHub repo and our [Discord server](https://discord.gg/XCGy2WDCQB). ## Documentation: [dspy.ai](https://dspy.ai) **Please go to the [DSPy Docs at dspy.ai](https://dspy.ai)** ## Installation ```bash pip install dspy ``` To install the very latest from `main`: ```bash pip install git+https://github.com/stanfordnlp/dspy.git ```` ## 📜 Citation & Reading More If you're looking to understand the framework, please go to the [DSPy Docs at dspy.ai](https://dspy.ai). If you're looking to understand the underlying research, this is a set of our papers: **[Jul'25] [GEPA: Reflective Prompt Evolution Can Outperform Reinforcement Learning](https://arxiv.org/abs/2507.19457)** **[Jun'24] [Optimizing Instructions and Demonstrations for Multi-Stage Language Model Programs](https://arxiv.org/abs/2406.11695)** **[Oct'23] [DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines](https://arxiv.org/abs/2310.03714)** [Jul'24] [Fine-Tuning and Prompt Optimization: Two Great Steps that Work Better Together](https://arxiv.org/abs/2407.10930) [Jun'24] [Prompts as Auto-Optimized Training Hyperparameters](https://arxiv.org/abs/2406.11706) [Feb'24] [Assisting in Writing Wikipedia-like Articles From Scratch with Large Language Models](https://arxiv.org/abs/2402.14207) [Jan'24] [In-Context Learning for Extreme Multi-Label Classification](https://arxiv.org/abs/2401.12178) [Dec'23] [DSPy Assertions: Computational Constraints for Self-Refining Language Model Pipelines](https://arxiv.org/abs/2312.13382) [Dec'22] [Demonstrate-Search-Predict: Composing Retrieval & Language Models for Knowledge-Intensive NLP](https://arxiv.org/abs/2212.14024.pdf) To stay up to date or learn more, follow [@DSPyOSS](https://twitter.com/DSPyOSS) on Twitter or the DSPy page on LinkedIn. The **DSPy** logo is designed by **Chuyi Zhang**. If you use DSPy or DSP in a research paper, please cite our work as follows: ``` @inproceedings{khattab2024dspy, title={DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines}, author={Khattab, Omar and Singhvi, Arnav and Maheshwari, Paridhi and Zhang, Zhiyuan and Santhanam, Keshav and Vardhamanan, Sri and Haq, Saiful and Sharma, Ashutosh and Joshi, Thomas T. and Moazam, Hanna and Miller, Heather and Zaharia, Matei and Potts, Christopher}, journal={The Twelfth International Conference on Learning Representations}, year={2024} } @article{khattab2022demonstrate, title={Demonstrate-Search-Predict: Composing Retrieval and Language Models for Knowledge-Intensive {NLP}}, author={Khattab, Omar and Santhanam, Keshav and Li, Xiang Lisa and Hall, David and Liang, Percy and Potts, Christopher and Zaharia, Matei}, journal={arXiv preprint arXiv:2212.14024}, year={2022} } ``` <!-- You can also read more about the evolution of the framework from Demonstrate-Search-Predict to DSPy: * [**DSPy Assertions: Computational Constraints for Self-Refining Language Model Pipelines**](https://arxiv.org/abs/2312.13382) (Academic Paper, Dec 2023) * [**DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines**](https://arxiv.org/abs/2310.03714) (Academic Paper, Oct 2023) * [**Releasing DSPy, the latest iteration of the framework**](https://twitter.com/lateinteraction/status/1694748401374490946) (Twitter Thread, Aug 2023) * [**Releasing the DSP Compiler (v0.1)**](https://twitter.com/lateinteraction/status/1625231662849073160) (Twitter Thread, Feb 2023) * [**Introducing DSP**](https://twitter.com/lateinteraction/status/1617953413576425472) (Twitter Thread, Jan 2023) * [**Demonstrate-Search-Predict: Composing retrieval and language models for knowledge-intensive NLP**](https://arxiv.org/abs/2212.14024.pdf) (Academic Paper, Dec 2022) --> ``` -------------------------------------------------------------------------------- /tests/reliability/README.md: -------------------------------------------------------------------------------- ```markdown # DSPy Reliability Tests This directory contains reliability tests for DSPy programs. The purpose of these tests is to verify that DSPy programs reliably produce expected outputs across multiple large language models (LLMs), regardless of model size or capability. These tests are designed to ensure that DSPy programs maintain robustness and accuracy across diverse LLM configurations. ### Overview Each test in this directory executes a DSPy program using various LLMs. By running the same tests across different models, these tests help validate that DSPy programs handle a wide range of inputs effectively and produce reliable outputs, even in cases where the model might struggle with the input or task. ### Key Features - **Diverse LLMs**: Each DSPy program is tested with multiple LLMs, ranging from smaller models to more advanced, high-performance models. This approach allows us to assess the consistency and generality of DSPy program outputs across different model capabilities. - **Challenging and Adversarial Tests**: Some of the tests are intentionally challenging or adversarial, crafted to push the boundaries of DSPy. These challenging cases allow us to gauge the robustness of DSPy and identify areas for potential improvement. - **Cross-Model Compatibility**: By testing with different LLMs, we aim to ensure that DSPy programs perform well across model types and configurations, reducing model-specific edge cases and enhancing program versatility. ### Running the Tests - First, populate the configuration file `reliability_tests_conf.yaml` (located in this directory) with the necessary LiteLLM model/provider names and access credentials for 1. each LLM you want to test and 2. the LLM judge that you want to use for assessing the correctness of outputs in certain test cases. These should be placed in the `litellm_params` section for each model in the defined `model_list`. You can also use `litellm_params` to specify values for LLM hyperparameters like `temperature`. Any model that lacks configured `litellm_params` in the configuration file will be ignored during testing. The configuration must also specify a DSPy adapter to use when testing, e.g. `"chat"` (for `dspy.ChatAdapter`) or `"json"` (for `dspy.JSONAdapter`). An example of `reliability_tests_conf.yaml`: ```yaml adapter: chat model_list: # The model to use for judging the correctness of program # outputs throughout reliability test suites. We recommend using # a high quality model as the judge, such as OpenAI GPT-4o - model_name: "judge" litellm_params: model: "openai/gpt-4o" api_key: "<my_openai_api_key>" - model_name: "gpt-4o" litellm_params: model: "openai/gpt-4o" api_key: "<my_openai_api_key>" - model_name: "claude-3.5-sonnet" litellm_params: model: "anthropic/claude-3.5" api_key: "<my_anthropic_api_key>" - Second, to run the tests, run the following command from this directory: ```bash pytest . ``` This will execute all tests for the configured models and display detailed results for each model configuration. Tests are set up to mark expected failures for known challenging cases where a specific model might struggle, while actual (unexpected) DSPy reliability issues are flagged as failures (see below). #### Running specific generated tests You can run specific generated tests by using the `-k` flag with `pytest`. For example, to test the generated program located at `tests/reliability/complex_types/generated/test_nesting_1` against generated test input `input1.json`, you can run the following command from this directory: ```bash pytest test_generated.py -k "test_nesting_1-input1" ``` ### Test generation You can generate test DSPy programs and test inputs from text descriptions using the `tests.reliability.generate` CLI, or the `tests.reliability.generate.generate_test_cases` API. For example, to generate a test classification program and 3 challenging test inputs in the `tests/reliability/classification/generated` directory, you can run the following command from the DSPy repository root directory: ```bash python \ -m tests.reliability.generate \ -d tests/reliability/classification/generated/test_example \ -p "Generate a program that performs a classification task involving objects with multiple properties. The task should be realistic" \ -i "Based on the program description, generate a challenging example" \ -n 3 ``` The test program will be written to `tests/reliability/classification/generated/test_example/program.py`, and the test inputs will be written as JSON files to the `tests/reliability/classification/generated/test_exaple/inputs/` directory. All generated tests should be located in directories with the structure `tests/reliability/<test_type>/generated/<test_name>`, where `<test_type>` is the type of test (e.g., `classification`, `complex_types`, `chat`, etc.), and `<test_name>` is a descriptive name for the test. ### Known Failing Models Some tests may be expected to fail with certain models, especially in challenging cases. These known failures are logged but do not affect the overall test result. This setup allows us to keep track of model-specific limitations without obstructing general test outcomes. Models that are known to fail a particular test case are specified using the `@known_failing_models` decorator. For example: ``` @known_failing_models(["llama-3.2-3b-instruct"]) def test_program_with_complex_deeply_nested_output_structure(): ... ``` ``` -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- ```markdown # Contribution Guide DSPy is an actively growing project and community! We welcome your contributions and involvement. Below are instructions for how to contribute to DSPy. ## Finding an Issue The fastest way to contribute is to find open issues that need an assignee. We maintain two lists of GitHub tags for contributors: - [good first issue](https://github.com/stanfordnlp/dspy/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22good%20first%20issue%22): a list of small, well-defined issues for newcomers to the project. - [help wanted](https://github.com/stanfordnlp/dspy/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22help%20wanted%22): a list of issues that welcome community contributions. These issues have a wide range of complexity. We also welcome new ideas! If you would like to propose a new feature, please open a feature request to discuss. If you already have a design in mind, please include a notebook/code example to demonstrate your idea. Keep in mind that designing a new feature or use case may take longer than contributing to an open issue. ## Contributing Code Follow these steps to submit your code contribution. ### Step 1. Open an Issue Before making any changes, we recommend opening an issue (if one doesn't already exist) and discussing your proposed changes. This way, we can give you feedback and validate the proposed changes. If your code change involves fixing a bug, please include a code snippet or notebook to show how to reproduce the broken behavior. For minor changes (simple bug fixes or documentation fixes), feel free to open a PR without discussion. ### Step 2. Make Code Changes To make code changes, fork the repository and set up your local development environment following the instructions in the "Environment Setup" section below. ### Step 3 Commit Your Code and Run Autoformatting We follow the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html) and use `ruff` for both linting and formatting. To ensure consistent code quality, we use pre-commit hooks that automatically check and fix common issues. First you need to set up the pre-commit hooks (do this once after cloning the repository): ```shell pre-commit install ``` Then stage and commit your changes. When you run `git commit`, the pre-commit hook will be automatically run. ```shell git add . git commit -m "your commit message" ``` If the hooks make any changes, you'll need to stage and commit those changes as well. You can also run the hooks manually: - Check staged files only: ```shell pre-commit run ``` - Check specific files: ```shell pre-commit run --files path/to/file1.py path/to/file2.py ``` Please ensure all pre-commit checks pass before creating your pull request. If you're unsure about any formatting issues, feel free to commit your changes and let the pre-commit hooks fix them automatically. ### Step 4. Create a Pull Request Once your changes are ready, open a pull request from your branch in your fork to the main branch in the [DSPy repo](https://github.com/stanfordnlp/dspy). ### Step 5. Code Review Once your PR is up and passes all CI tests, we will assign reviewers to review the code. There may be several rounds of comments and code changes before the pull request gets approved by the reviewer. ### Step 6. Merging Once the pull request is approved, a team member will take care of merging. ## Environment Setup Python 3.10 or later is required. Setting up your DSPy development environment requires you to fork the DSPy repository and clone it locally. If you are not familiar with the GitHub fork process, please refer to [Fork a repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo). After creating the fork, clone it to your local development device: ```shell git clone {url-to-your-fork} cd dspy ``` Next, we must set up a Python environment with the correct dependencies. There are two recommended ways to set up the dev environment. ### [Recommended] Set Up Environment Using uv [uv](https://github.com/astral-sh/uv) is a rust-based Python package and project manager that provides a fast way to set up the development environment. First, install uv by following the [installation guide](https://docs.astral.sh/uv/getting-started/installation/). After uv is installed, in your working directory (`dspy/`), run: ```shell uv sync --extra dev ``` Then you are all set! To verify that your environment is set up successfully, run some unit tests: ```shell uv run pytest tests/predict ``` Note: You need to use the `uv run` prefix for every Python command, as uv creates a Python virtual environment and `uv run` points the command to that environment. For example, to execute a Python script you will need `uv run python script.py`. ### Set Up Environment Using conda + pip You can also set up the virtual environment via conda + pip, which takes a few extra steps but offers more flexibility. Before starting, make sure you have conda installed. If not, please follow the instructions [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html). To set up the environment, run: ```shell conda create -n dspy-dev python=3.11 conda activate dspy-dev pip install -e ".[dev]" ``` Then verify the installation by running some unit tests: ```shell pytest tests/predict ``` ``` -------------------------------------------------------------------------------- /dspy/dsp/__init__.py: -------------------------------------------------------------------------------- ```python ``` -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- ```python ``` -------------------------------------------------------------------------------- /tests/reliability/__init__.py: -------------------------------------------------------------------------------- ```python ``` -------------------------------------------------------------------------------- /tests/test_utils/__init__.py: -------------------------------------------------------------------------------- ```python ``` -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- ```python ``` -------------------------------------------------------------------------------- /tests/teleprompt/test_finetune.py: -------------------------------------------------------------------------------- ```python # TODO ``` -------------------------------------------------------------------------------- /dspy/predict/parameter.py: -------------------------------------------------------------------------------- ```python class Parameter: pass ``` -------------------------------------------------------------------------------- /dspy/datasets/alfworld/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.datasets.alfworld.alfworld import AlfWorld ``` -------------------------------------------------------------------------------- /dspy/teleprompt/gepa/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.teleprompt.gepa.gepa import GEPA __all__ = ["GEPA"] ``` -------------------------------------------------------------------------------- /dspy/propose/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.propose.grounded_proposer import GroundedProposer __all__ = [ "GroundedProposer", ] ``` -------------------------------------------------------------------------------- /dspy/dsp/utils/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.dsp.utils.dpr import * from dspy.dsp.utils.settings import * from dspy.dsp.utils.utils import * ``` -------------------------------------------------------------------------------- /dspy/predict/avatar/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.predict.avatar.avatar import * from dspy.predict.avatar.models import * from dspy.predict.avatar.signatures import * ``` -------------------------------------------------------------------------------- /dspy/retrievers/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.retrievers.embeddings import Embeddings from dspy.retrievers.retrieve import Retrieve __all__ = ["Embeddings", "Retrieve"] ``` -------------------------------------------------------------------------------- /dspy/experimental/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.adapters.types.citation import Citations from dspy.adapters.types.document import Document __all__ = [ "Citations", "Document", ] ``` -------------------------------------------------------------------------------- /docs/docs/api/index.md: -------------------------------------------------------------------------------- ```markdown # API Reference Welcome to the DSPy API reference documentation. This section provides detailed information about DSPy's classes, modules, and functions. ``` -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- ``` git+https://github.com/stanfordnlp/dspy.git mkdocs-material mkdocs-jupyter mkdocs-material[imaging] mkdocs-redirects mkdocstrings mkdocstrings-python mkdocs-llmstxt>=0.3.0 urllib3==1.26.6 mistune==3.0.2 ``` -------------------------------------------------------------------------------- /docs/vercel.json: -------------------------------------------------------------------------------- ```json { "trailingSlash": true, "headers": [ { "source": "/(.*).md", "headers": [ { "key": "Content-Type", "value": "text/markdown; charset=utf-8" } ] } ] } ``` -------------------------------------------------------------------------------- /dspy/__metadata__.py: -------------------------------------------------------------------------------- ```python #replace_package_name_marker __name__="dspy" #replace_package_version_marker __version__="3.0.4b1" __description__="DSPy" __url__="https://github.com/stanfordnlp/dspy" __author__="Omar Khattab" __author_email__="[email protected]" ``` -------------------------------------------------------------------------------- /dspy/propose/propose_base.py: -------------------------------------------------------------------------------- ```python from abc import ABC, abstractmethod class Proposer(ABC): def __init__(self): pass @abstractmethod def propose_instructions_for_program(self): pass def propose_instruction_for_predictor(self): pass ``` -------------------------------------------------------------------------------- /docs/docs/community/how-to-contribute.md: -------------------------------------------------------------------------------- ```markdown # Contributing DSPy is an actively growing project and community, and we welcome your contributions and involvement! Please read the [contributing guide](https://github.com/stanfordnlp/dspy/blob/main/CONTRIBUTING.md) for how to contribute to DSPy. ``` -------------------------------------------------------------------------------- /docs/docs/tutorials/classification/index.md: -------------------------------------------------------------------------------- ```markdown Please refer to [this tutorial from Drew Breunig](https://www.dbreunig.com/2024/12/12/pipelines-prompt-optimization-with-dspy.html) using DSPy. This tutorial demonstrates a few aspects of using DSPy in a highly-accessible, concrete context for categorizing historic events with a tiny LM. ``` -------------------------------------------------------------------------------- /docs/overrides/main.html: -------------------------------------------------------------------------------- ```html {% extends "base.html" %} {% block extrahead %} <script async src="https://www.googletagmanager.com/gtag/js?id=G-G728W2L8KQ"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'G-G728W2L8KQ'); </script> {% endblock %} ``` -------------------------------------------------------------------------------- /dspy/signatures/utils.py: -------------------------------------------------------------------------------- ```python from typing import Literal from pydantic.fields import FieldInfo def get_dspy_field_type(field: FieldInfo) -> Literal["input", "output"]: field_type = field.json_schema_extra.get("__dspy_field_type") if field_type is None: raise ValueError(f"Field {field} does not have a __dspy_field_type") return field_type ``` -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- ```markdown ## 📝 Changes Description This MR/PR contains the following changes: ... ## ✅ Contributor Checklist - [] Pre-Commit checks are passing (locally and remotely) - [] Title of your PR / MR corresponds to the required format - [] Commit message follows required format {label}(dspy): {message} ## ⚠️ Warnings Anything we should be aware of ? ``` -------------------------------------------------------------------------------- /dspy/datasets/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.datasets.alfworld import AlfWorld from dspy.datasets.colors import Colors from dspy.datasets.dataloader import DataLoader from dspy.datasets.dataset import Dataset from dspy.datasets.hotpotqa import HotPotQA from dspy.datasets.math import MATH __all__ = [ "Colors", "DataLoader", "Dataset", "HotPotQA", "MATH", ] ``` -------------------------------------------------------------------------------- /tests/metadata/test_metadata.py: -------------------------------------------------------------------------------- ```python import re import dspy def test_metadata(): assert dspy.__name__ == "dspy" assert re.match(r"\d+\.\d+\.\d+", dspy.__version__) assert dspy.__author__ == "Omar Khattab" assert dspy.__author_email__ == "[email protected]" assert dspy.__url__ == "https://github.com/stanfordnlp/dspy" assert dspy.__description__ == "DSPy" ``` -------------------------------------------------------------------------------- /dspy/adapters/types/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.adapters.types.audio import Audio from dspy.adapters.types.base_type import Type from dspy.adapters.types.code import Code from dspy.adapters.types.history import History from dspy.adapters.types.image import Image from dspy.adapters.types.tool import Tool, ToolCalls __all__ = ["History", "Image", "Audio", "Type", "Tool", "ToolCalls", "Code"] ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/load.md: -------------------------------------------------------------------------------- ```markdown # dspy.load <!-- START_API_REF --> ::: dspy.load handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/primitives/History.md: -------------------------------------------------------------------------------- ```markdown # dspy.History <!-- START_API_REF --> ::: dspy.History handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/asyncify.md: -------------------------------------------------------------------------------- ```markdown # dspy.asyncify <!-- START_API_REF --> ::: dspy.asyncify handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/streamify.md: -------------------------------------------------------------------------------- ```markdown # dspy.streamify <!-- START_API_REF --> ::: dspy.streamify handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/signatures/InputField.md: -------------------------------------------------------------------------------- ```markdown # dspy.InputField <!-- START_API_REF --> ::: dspy.InputField handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/signatures/OutputField.md: -------------------------------------------------------------------------------- ```markdown # dspy.OutputField <!-- START_API_REF --> ::: dspy.OutputField handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/enable_logging.md: -------------------------------------------------------------------------------- ```markdown # dspy.enable_logging <!-- START_API_REF --> ::: dspy.enable_logging handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/configure_cache.md: -------------------------------------------------------------------------------- ```markdown # dspy.configure_cache <!-- START_API_REF --> ::: dspy.configure_cache handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/disable_logging.md: -------------------------------------------------------------------------------- ```markdown # dspy.disable_logging <!-- START_API_REF --> ::: dspy.disable_logging handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/inspect_history.md: -------------------------------------------------------------------------------- ```markdown # dspy.inspect_history <!-- START_API_REF --> ::: dspy.inspect_history handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /dspy/primitives/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.primitives.base_module import BaseModule from dspy.primitives.example import Example from dspy.primitives.module import Module from dspy.primitives.prediction import Completions, Prediction from dspy.primitives.python_interpreter import PythonInterpreter __all__ = [ "Example", "BaseModule", "Prediction", "Completions", "Module", "PythonInterpreter", ] ``` -------------------------------------------------------------------------------- /.github/workflow_scripts/install_testpypi_pkg.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash # The $1 argument is the version number passed from the workflow VERSION=$1 echo "version: $VERSION" for i in {1..5}; do if python3 -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple dspy-ai-test=="$VERSION"; then break else echo "Attempt $i failed. Waiting before retrying..." sleep 10 fi done ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/enable_litellm_logging.md: -------------------------------------------------------------------------------- ```markdown # dspy.enable_litellm_logging <!-- START_API_REF --> ::: dspy.enable_litellm_logging handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /tests/test_utils/server/litellm_server_config.yaml: -------------------------------------------------------------------------------- ```yaml model_list: - model_name: "dspy-test-model" litellm_params: model: "dspy-test-provider/dspy-test-model" - model_name: "dspy-test-model-2" litellm_params: model: "dspy-test-provider/dspy-test-model" litellm_settings: num_retries: 0 custom_provider_map: - { "provider": "dspy-test-provider", "custom_handler": litellm_server.dspy_test_model, } ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/KNN.md: -------------------------------------------------------------------------------- ```markdown # dspy.KNN <!-- START_API_REF --> ::: dspy.KNN handler: python options: members: - __call__ show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/disable_litellm_logging.md: -------------------------------------------------------------------------------- ```markdown # dspy.disable_litellm_logging <!-- START_API_REF --> ::: dspy.disable_litellm_logging handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/StatusMessage.md: -------------------------------------------------------------------------------- ```markdown # dspy.streaming.StatusMessage <!-- START_API_REF --> ::: dspy.streaming.StatusMessage handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/evaluation/answer_exact_match.md: -------------------------------------------------------------------------------- ```markdown # dspy.evaluate.answer_exact_match <!-- START_API_REF --> ::: dspy.evaluate.answer_exact_match handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/evaluation/Evaluate.md: -------------------------------------------------------------------------------- ```markdown # dspy.Evaluate <!-- START_API_REF --> ::: dspy.Evaluate handler: python options: members: - __call__ show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /tests/reliability/test_generated.py: -------------------------------------------------------------------------------- ```python import os import pytest from tests.reliability.generate.utils import load_generated_cases, run_generated_case _DIR_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__))) @pytest.mark.reliability @pytest.mark.parametrize( "generated_case", load_generated_cases(_DIR_PATH), ids=lambda case: case.name, ) def test_generated_cases(generated_case): run_generated_case(generated_case) ``` -------------------------------------------------------------------------------- /docs/docs/api/evaluation/answer_passage_match.md: -------------------------------------------------------------------------------- ```markdown # dspy.evaluate.answer_passage_match <!-- START_API_REF --> ::: dspy.evaluate.answer_passage_match handler: python options: show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/tools/ColBERTv2.md: -------------------------------------------------------------------------------- ```markdown # dspy.ColBERTv2 <!-- START_API_REF --> ::: dspy.ColBERTv2 handler: python options: members: - __call__ show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /dspy/evaluate/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.evaluate.auto_evaluation import CompleteAndGrounded, SemanticF1 from dspy.evaluate.evaluate import Evaluate, EvaluationResult from dspy.evaluate.metrics import EM, answer_exact_match, answer_passage_match, normalize_text __all__ = [ "EM", "normalize_text", "answer_exact_match", "answer_passage_match", "Evaluate", "SemanticF1", "CompleteAndGrounded", "EvaluationResult", ] ``` -------------------------------------------------------------------------------- /dspy/streaming/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.streaming.messages import StatusMessage, StatusMessageProvider, StreamResponse from dspy.streaming.streamify import apply_sync_streaming, streamify, streaming_response from dspy.streaming.streaming_listener import StreamListener __all__ = [ "StatusMessage", "StatusMessageProvider", "streamify", "StreamListener", "StreamResponse", "streaming_response", "apply_sync_streaming", ] ``` -------------------------------------------------------------------------------- /dspy/utils/caching.py: -------------------------------------------------------------------------------- ```python import os from pathlib import Path _DEFAULT_CACHE_DIR = os.path.join(Path.home(), ".dspy_cache") DSPY_CACHEDIR = os.environ.get("DSPY_CACHEDIR") or _DEFAULT_CACHE_DIR def create_subdir_in_cachedir(subdir: str) -> str: """Create a subdirectory in the DSPy cache directory.""" subdir = os.path.join(DSPY_CACHEDIR, subdir) subdir = os.path.abspath(subdir) os.makedirs(subdir, exist_ok=True) return subdir ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/COPRO.md: -------------------------------------------------------------------------------- ```markdown # dspy.COPRO <!-- START_API_REF --> ::: dspy.COPRO handler: python options: members: - compile - get_params show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/models/Embedder.md: -------------------------------------------------------------------------------- ```markdown # dspy.Embedder <!-- START_API_REF --> ::: dspy.Embedder handler: python options: members: - __call__ - acall show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/modules/Parallel.md: -------------------------------------------------------------------------------- ```markdown # dspy.Parallel <!-- START_API_REF --> ::: dspy.Parallel handler: python options: members: - __call__ - forward show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/Ensemble.md: -------------------------------------------------------------------------------- ```markdown # dspy.Ensemble <!-- START_API_REF --> ::: dspy.Ensemble handler: python options: members: - compile - get_params show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/KNNFewShot.md: -------------------------------------------------------------------------------- ```markdown # dspy.KNNFewShot <!-- START_API_REF --> ::: dspy.KNNFewShot handler: python options: members: - compile - get_params show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/BootstrapRS.md: -------------------------------------------------------------------------------- ```markdown # dspy.BootstrapRS <!-- START_API_REF --> ::: dspy.BootstrapRS handler: python options: members: - compile - get_params show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/BetterTogether.md: -------------------------------------------------------------------------------- ```markdown # dspy.BetterTogether <!-- START_API_REF --> ::: dspy.BetterTogether handler: python options: members: - compile - get_params show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/LabeledFewShot.md: -------------------------------------------------------------------------------- ```markdown # dspy.LabeledFewShot <!-- START_API_REF --> ::: dspy.LabeledFewShot handler: python options: members: - compile - get_params show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/tools/Embeddings.md: -------------------------------------------------------------------------------- ```markdown # dspy.retrievers.Embeddings <!-- START_API_REF --> ::: dspy.Embeddings handler: python options: members: - __call__ - forward show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/BootstrapFewShot.md: -------------------------------------------------------------------------------- ```markdown # dspy.BootstrapFewShot <!-- START_API_REF --> ::: dspy.BootstrapFewShot handler: python options: members: - compile - get_params show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /dspy/signatures/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.signatures.field import InputField, OldField, OldInputField, OldOutputField, OutputField from dspy.signatures.signature import ( Signature, SignatureMeta, ensure_signature, infer_prefix, make_signature, ) __all__ = [ "InputField", "OutputField", "OldField", "OldInputField", "OldOutputField", "SignatureMeta", "Signature", "infer_prefix", "ensure_signature", "make_signature", ] ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/StreamListener.md: -------------------------------------------------------------------------------- ```markdown # dspy.streaming.StreamListener <!-- START_API_REF --> ::: dspy.streaming.StreamListener handler: python options: members: - flush - receive show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/tools/PythonInterpreter.md: -------------------------------------------------------------------------------- ```markdown # dspy.PythonInterpreter <!-- START_API_REF --> ::: dspy.PythonInterpreter handler: python options: members: - __call__ - execute - shutdown show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/BootstrapFewShotWithRandomSearch.md: -------------------------------------------------------------------------------- ```markdown # dspy.BootstrapFewShotWithRandomSearch <!-- START_API_REF --> ::: dspy.BootstrapFewShotWithRandomSearch handler: python options: members: - compile - get_params show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/BootstrapFinetune.md: -------------------------------------------------------------------------------- ```markdown # dspy.BootstrapFinetune <!-- START_API_REF --> ::: dspy.BootstrapFinetune handler: python options: members: - compile - convert_to_lm_dict - finetune_lms - get_params show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /tests/teleprompt/test_teleprompt.py: -------------------------------------------------------------------------------- ```python from dspy.teleprompt.teleprompt import Teleprompter class DummyTeleprompter(Teleprompter): def __init__(self, param1: int, param2: str): super().__init__() self.param1 = param1 self.param2 = param2 def compile(self, student, *, trainset, teacher=None, valset=None, **kwargs): return student def test_get_params(): teleprompter = DummyTeleprompter(param1=1, param2="test") params = teleprompter.get_params() assert params == {"param1": 1, "param2": "test"} ``` -------------------------------------------------------------------------------- /dspy/adapters/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.adapters.base import Adapter from dspy.adapters.chat_adapter import ChatAdapter from dspy.adapters.json_adapter import JSONAdapter from dspy.adapters.two_step_adapter import TwoStepAdapter from dspy.adapters.types import Audio, Code, History, Image, Tool, ToolCalls, Type from dspy.adapters.xml_adapter import XMLAdapter __all__ = [ "Adapter", "ChatAdapter", "Type", "History", "Image", "Audio", "Code", "JSONAdapter", "XMLAdapter", "TwoStepAdapter", "Tool", "ToolCalls", ] ``` -------------------------------------------------------------------------------- /docs/docs/tutorials/papillon/index.md: -------------------------------------------------------------------------------- ```markdown Please refer to [this tutorial from the PAPILLON authors](https://colab.research.google.com/github/Columbia-NLP-Lab/PAPILLON/blob/main/papillon_tutorial.ipynb) using DSPy. This tutorial demonstrates a few aspects of using DSPy in a more advanced context: 1. It builds a multi-stage `dspy.Module` that involves a small local LM using an external tool. 2. It builds a multi-stage _judge_ in DSPy, and uses it as a metric for evaluation. 3. It uses this judge for optimizing the `dspy.Module`, using a large model as a teacher for a small local LM. ``` -------------------------------------------------------------------------------- /docs/docs/api/primitives/Example.md: -------------------------------------------------------------------------------- ```markdown # dspy.Example <!-- START_API_REF --> ::: dspy.Example handler: python options: members: - copy - get - inputs - items - keys - labels - toDict - values - with_inputs - without show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/signatures/Signature.md: -------------------------------------------------------------------------------- ```markdown # dspy.Signature <!-- START_API_REF --> ::: dspy.Signature handler: python options: members: - append - delete - dump_state - equals - insert - load_state - prepend - with_instructions - with_updated_fields show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/InferRules.md: -------------------------------------------------------------------------------- ```markdown # dspy.InferRules <!-- START_API_REF --> ::: dspy.InferRules handler: python options: members: - compile - evaluate_program - format_examples - get_params - get_predictor_demos - induce_natural_language_rules - update_program_instructions show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /.github/.internal_dspyai/pyproject.toml: -------------------------------------------------------------------------------- ```toml [project] #replace_package_name_marker name="dspy-ai" #replace_package_version_marker version="3.0.4b1" description = "DSPy" readme = "README.md" authors = [ { name = "Omar Khattab", email = "[email protected]" } ] license = { text = "MIT License" } requires-python = ">=3.9" #replace_dspy_version_marker dependencies = ["dspy>=3.0.4b1"] urls = { "Homepage" = "https://github.com/stanfordnlp/dsp" } [build-system] requires = ["setuptools>=40.8.0", "wheel"] build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] include = ["dsp.*", "dspy.*", "dsp", "dspy"] ``` -------------------------------------------------------------------------------- /docs/docs/api/primitives/Code.md: -------------------------------------------------------------------------------- ```markdown # dspy.Code <!-- START_API_REF --> ::: dspy.Code handler: python options: members: - description - extract_custom_type_from_annotation - format - is_streamable - parse_lm_response - parse_stream_chunk - serialize_model - validate_input show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /dspy/predict/avatar/models.py: -------------------------------------------------------------------------------- ```python from typing import Any from pydantic import BaseModel, Field class Tool(BaseModel): tool: Any name: str desc: str | None input_type: str | None = None def __str__(self) -> str: return f"{self.name}{f'(valid_input: {self.input_type})' if self.input_type else ''}: {self.desc}" def __repr__(self) -> str: return self.__str__() class Action(BaseModel): tool_name: Any = Field(..., description="Name of the tool to use.") tool_input_query: Any = Field(..., description="Query to pass as input to the tool.") class ActionOutput(BaseModel): tool_name: str tool_input_query: str tool_output: str ``` -------------------------------------------------------------------------------- /docs/docs/api/experimental/Document.md: -------------------------------------------------------------------------------- ```markdown # dspy.experimental.Document <!-- START_API_REF --> ::: dspy.experimental.Document handler: python options: members: - description - extract_custom_type_from_annotation - format - is_streamable - parse_lm_response - parse_stream_chunk - serialize_model - validate_input show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/primitives/ToolCalls.md: -------------------------------------------------------------------------------- ```markdown # dspy.ToolCalls <!-- START_API_REF --> ::: dspy.ToolCalls handler: python options: members: - description - extract_custom_type_from_annotation - format - from_dict_list - is_streamable - parse_lm_response - parse_stream_chunk - serialize_model - validate_input show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/utils/StatusMessageProvider.md: -------------------------------------------------------------------------------- ```markdown # dspy.streaming.StatusMessageProvider <!-- START_API_REF --> ::: dspy.streaming.StatusMessageProvider handler: python options: members: - lm_end_status_message - lm_start_status_message - module_end_status_message - module_start_status_message - tool_end_status_message - tool_start_status_message show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/primitives/Image.md: -------------------------------------------------------------------------------- ```markdown # dspy.Image <!-- START_API_REF --> ::: dspy.Image handler: python options: members: - description - extract_custom_type_from_annotation - format - from_PIL - from_file - from_url - is_streamable - parse_lm_response - parse_stream_chunk - serialize_model show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/models/LM.md: -------------------------------------------------------------------------------- ```markdown # dspy.LM <!-- START_API_REF --> ::: dspy.LM handler: python options: members: - __call__ - acall - aforward - copy - dump_state - finetune - forward - infer_provider - inspect_history - kill - launch - reinforce - update_history show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/primitives/Prediction.md: -------------------------------------------------------------------------------- ```markdown # dspy.Prediction <!-- START_API_REF --> ::: dspy.Prediction handler: python options: members: - copy - from_completions - get - get_lm_usage - inputs - items - keys - labels - set_lm_usage - toDict - values - with_inputs - without show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/experimental/Citations.md: -------------------------------------------------------------------------------- ```markdown # dspy.experimental.Citations <!-- START_API_REF --> ::: dspy.experimental.Citations handler: python options: members: - description - extract_custom_type_from_annotation - format - from_dict_list - is_streamable - parse_lm_response - parse_stream_chunk - serialize_model - validate_input show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/primitives/Audio.md: -------------------------------------------------------------------------------- ```markdown # dspy.Audio <!-- START_API_REF --> ::: dspy.Audio handler: python options: members: - description - extract_custom_type_from_annotation - format - from_array - from_file - from_url - is_streamable - parse_lm_response - parse_stream_chunk - serialize_model - validate_input show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/evaluation/EvaluationResult.md: -------------------------------------------------------------------------------- ```markdown # dspy.evaluate.EvaluationResult <!-- START_API_REF --> ::: dspy.evaluate.EvaluationResult handler: python options: members: - copy - from_completions - get - get_lm_usage - inputs - items - keys - labels - set_lm_usage - toDict - values - with_inputs - without show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/js/runllm-widget.js: -------------------------------------------------------------------------------- ```javascript document.addEventListener("DOMContentLoaded", function () { var script = document.createElement("script"); script.defer = true; script.type = "module"; script.id = "runllm-widget-script"; script.src = "https://widget.runllm.com"; script.setAttribute("runllm-name", "DSPy"); script.setAttribute("runllm-preset", "mkdocs"); script.setAttribute("runllm-server-address", "https://api.runllm.com"); script.setAttribute("runllm-assistant-id", "132"); script.setAttribute("runllm-position", "BOTTOM_RIGHT"); script.setAttribute("runllm-keyboard-shortcut", "Mod+j"); script.setAttribute( "runllm-slack-community-url", "" ); document.head.appendChild(script); }); ``` -------------------------------------------------------------------------------- /dspy/predict/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.predict.aggregation import majority from dspy.predict.best_of_n import BestOfN from dspy.predict.chain_of_thought import ChainOfThought from dspy.predict.code_act import CodeAct from dspy.predict.knn import KNN from dspy.predict.multi_chain_comparison import MultiChainComparison from dspy.predict.parallel import Parallel from dspy.predict.predict import Predict from dspy.predict.program_of_thought import ProgramOfThought from dspy.predict.react import ReAct, Tool from dspy.predict.refine import Refine __all__ = [ "majority", "BestOfN", "ChainOfThought", "CodeAct", "KNN", "MultiChainComparison", "Predict", "ProgramOfThought", "ReAct", "Refine", "Tool", "Parallel", ] ``` -------------------------------------------------------------------------------- /docs/docs/api/adapters/Adapter.md: -------------------------------------------------------------------------------- ```markdown # dspy.Adapter <!-- START_API_REF --> ::: dspy.Adapter handler: python options: members: - __call__ - acall - format - format_assistant_message_content - format_conversation_history - format_demos - format_field_description - format_field_structure - format_task_description - format_user_message_content - parse show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/primitives/Tool.md: -------------------------------------------------------------------------------- ```markdown # dspy.Tool <!-- START_API_REF --> ::: dspy.Tool handler: python options: members: - __call__ - acall - description - extract_custom_type_from_annotation - format - format_as_litellm_function_call - from_langchain - from_mcp_tool - is_streamable - parse_lm_response - parse_stream_chunk - serialize_model show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/adapters/TwoStepAdapter.md: -------------------------------------------------------------------------------- ```markdown # dspy.TwoStepAdapter <!-- START_API_REF --> ::: dspy.TwoStepAdapter handler: python options: members: - __call__ - acall - format - format_assistant_message_content - format_conversation_history - format_demos - format_field_description - format_field_structure - format_task_description - format_user_message_content - parse show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /tests/predict/test_chain_of_thought.py: -------------------------------------------------------------------------------- ```python import pytest import dspy from dspy import ChainOfThought from dspy.utils import DummyLM def test_initialization_with_string_signature(): lm = DummyLM([{"reasoning": "find the number after 1", "answer": "2"}]) dspy.settings.configure(lm=lm) predict = ChainOfThought("question -> answer") assert list(predict.predict.signature.output_fields.keys()) == [ "reasoning", "answer", ] assert predict(question="What is 1+1?").answer == "2" @pytest.mark.asyncio async def test_async_chain_of_thought(): lm = DummyLM([{"reasoning": "find the number after 1", "answer": "2"}]) with dspy.context(lm=lm): program = ChainOfThought("question -> answer") result = await program.acall(question="What is 1+1?") assert result.answer == "2" ``` -------------------------------------------------------------------------------- /tests/utils/resources/mcp_server.py: -------------------------------------------------------------------------------- ```python from mcp.server.fastmcp import FastMCP from pydantic import BaseModel mcp = FastMCP("test") class Profile(BaseModel): name: str age: int class Account(BaseModel): profile: Profile account_id: str @mcp.tool() def add(a: int, b: int) -> int: """Add two numbers""" return a + b @mcp.tool() def hello(names: list[str]) -> str: """Greet people""" return [f"Hello, {name}!" for name in names] @mcp.tool() def wrong_tool(): """This tool raises an error""" raise ValueError("error!") @mcp.tool() def get_account_name(account: Account): """This extracts the name from account""" return account.profile.name @mcp.tool() def current_datetime() -> str: """Get the current datetime""" return "2025-07-23T09:10:10.0+00:00" if __name__ == "__main__": mcp.run() ``` -------------------------------------------------------------------------------- /docs/docs/api/modules/Module.md: -------------------------------------------------------------------------------- ```markdown # dspy.Module <!-- START_API_REF --> ::: dspy.Module handler: python options: members: - __call__ - acall - batch - deepcopy - dump_state - get_lm - inspect_history - load - load_state - map_named_predictors - named_parameters - named_predictors - named_sub_modules - parameters - predictors - reset_copy - save - set_lm show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/modules/Refine.md: -------------------------------------------------------------------------------- ```markdown # dspy.Refine <!-- START_API_REF --> ::: dspy.Refine handler: python options: members: - __call__ - acall - batch - deepcopy - dump_state - forward - get_lm - inspect_history - load - load_state - map_named_predictors - named_parameters - named_predictors - named_sub_modules - parameters - predictors - reset_copy - save - set_lm show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/modules/BestOfN.md: -------------------------------------------------------------------------------- ```markdown # dspy.BestOfN <!-- START_API_REF --> ::: dspy.BestOfN handler: python options: members: - __call__ - acall - batch - deepcopy - dump_state - forward - get_lm - inspect_history - load - load_state - map_named_predictors - named_parameters - named_predictors - named_sub_modules - parameters - predictors - reset_copy - save - set_lm show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/adapters/ChatAdapter.md: -------------------------------------------------------------------------------- ```markdown # dspy.ChatAdapter <!-- START_API_REF --> ::: dspy.ChatAdapter handler: python options: members: - __call__ - acall - format - format_assistant_message_content - format_conversation_history - format_demos - format_field_description - format_field_structure - format_field_with_value - format_finetune_data - format_task_description - format_user_message_content - parse - user_message_output_requirements show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/adapters/JSONAdapter.md: -------------------------------------------------------------------------------- ```markdown # dspy.JSONAdapter <!-- START_API_REF --> ::: dspy.JSONAdapter handler: python options: members: - __call__ - acall - format - format_assistant_message_content - format_conversation_history - format_demos - format_field_description - format_field_structure - format_field_with_value - format_finetune_data - format_task_description - format_user_message_content - parse - user_message_output_requirements show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/modules/ProgramOfThought.md: -------------------------------------------------------------------------------- ```markdown # dspy.ProgramOfThought <!-- START_API_REF --> ::: dspy.ProgramOfThought handler: python options: members: - __call__ - acall - batch - deepcopy - dump_state - forward - get_lm - inspect_history - load - load_state - map_named_predictors - named_parameters - named_predictors - named_sub_modules - parameters - predictors - reset_copy - save - set_lm show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/evaluation/SemanticF1.md: -------------------------------------------------------------------------------- ```markdown # dspy.evaluate.SemanticF1 <!-- START_API_REF --> ::: dspy.evaluate.SemanticF1 handler: python options: members: - __call__ - acall - batch - deepcopy - dump_state - forward - get_lm - inspect_history - load - load_state - map_named_predictors - named_parameters - named_predictors - named_sub_modules - parameters - predictors - reset_copy - save - set_lm show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- ```yaml name: Feature Request description: Suggest a new feature or improvement title: "[Feature] " labels: enhancement body: - type: markdown attributes: value: | ## 🚀 Feature Request Please fill out the following details. - type: textarea id: description attributes: label: "What feature would you like to see?" description: "Describe the feature clearly." validations: required: true - type: checkboxes id: contribute attributes: label: "Would you like to contribute?" options: - label: Yes, I'd like to help implement this. - label: No, I just want to request it. - type: textarea id: additional-info attributes: label: "Additional Context" description: "Any links, references, or extra details?" placeholder: "Example: This feature exists in XYZ tool." ``` -------------------------------------------------------------------------------- /docs/docs/api/modules/MultiChainComparison.md: -------------------------------------------------------------------------------- ```markdown # dspy.MultiChainComparison <!-- START_API_REF --> ::: dspy.MultiChainComparison handler: python options: members: - __call__ - acall - batch - deepcopy - dump_state - forward - get_lm - inspect_history - load - load_state - map_named_predictors - named_parameters - named_predictors - named_sub_modules - parameters - predictors - reset_copy - save - set_lm show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /tests/reliability/complex_types/generated/test_nesting_1/inputs/input2.json: -------------------------------------------------------------------------------- ```json { "assertions": [ "The output should have a top-level field named 'resultLevel1'.", "Within 'resultLevel1', there should be a nested field named 'resultLevel2'.", "Within 'resultLevel2', there should be a nested field named 'resultLevel3'.", "Within 'resultLevel3', there should be a nested field named 'resultLevel4'.", "Within 'resultLevel4', there should be a nested field named 'resultLevel5'.", "Within 'resultLevel5', there should be a field named 'outputField1' which must be of type boolean.", "Within 'resultLevel5', there should be a field named 'outputField2' which must be an array of strings." ], "input": { "level1": { "level2": { "level3": { "level4": { "level5": { "field1": "test string", "field2": 123.45 } } } } } } } ``` -------------------------------------------------------------------------------- /docs/docs/api/modules/ChainOfThought.md: -------------------------------------------------------------------------------- ```markdown # dspy.ChainOfThought <!-- START_API_REF --> ::: dspy.ChainOfThought handler: python options: members: - __call__ - acall - aforward - batch - deepcopy - dump_state - forward - get_lm - inspect_history - load - load_state - map_named_predictors - named_parameters - named_predictors - named_sub_modules - parameters - predictors - reset_copy - save - set_lm show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /docs/docs/api/evaluation/CompleteAndGrounded.md: -------------------------------------------------------------------------------- ```markdown # dspy.evaluate.CompleteAndGrounded <!-- START_API_REF --> ::: dspy.evaluate.CompleteAndGrounded handler: python options: members: - __call__ - acall - batch - deepcopy - dump_state - forward - get_lm - inspect_history - load - load_state - map_named_predictors - named_parameters - named_predictors - named_sub_modules - parameters - predictors - reset_copy - save - set_lm show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /dspy/predict/avatar/signatures.py: -------------------------------------------------------------------------------- ```python import dspy from dspy.predict.avatar.models import Action class Actor(dspy.Signature): """You will be given `Tools` which will be a list of tools to use to accomplish the `Goal`. Given the user query, your task is to decide which tool to use and what input values to provide. You will output action needed to accomplish the `Goal`. `Action` should have a tool to use and the input query to pass to the tool. Note: You can opt to use no tools and provide the final answer directly. You can also one tool multiple times with different input queries if applicable.""" goal: str = dspy.InputField( prefix="Goal:", desc="Task to be accomplished.", ) tools: list[str] = dspy.InputField( prefix="Tools:", desc="list of tools to use", ) action_1: Action = dspy.OutputField( prefix="Action 1:", desc="1st action to take.", ) ``` -------------------------------------------------------------------------------- /docs/docs/api/modules/ReAct.md: -------------------------------------------------------------------------------- ```markdown # dspy.ReAct <!-- START_API_REF --> ::: dspy.ReAct handler: python options: members: - __call__ - acall - aforward - batch - deepcopy - dump_state - forward - get_lm - inspect_history - load - load_state - map_named_predictors - named_parameters - named_predictors - named_sub_modules - parameters - predictors - reset_copy - save - set_lm - truncate_trajectory show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /tests/reliability/complex_types/generated/test_nesting_1/inputs/input1.json: -------------------------------------------------------------------------------- ```json { "assertions": [ "The top-level output should contain the key 'resultLevel1'.", "'resultLevel1' should contain the key 'resultLevel2'.", "'resultLevel2' should contain the key 'resultLevel3'.", "'resultLevel3' should contain the key 'resultLevel4'.", "'resultLevel4' should contain the key 'resultLevel5'.", "'resultLevel5' should contain the key 'outputField1' which should be of type boolean.", "'resultLevel5' should contain the key 'outputField2' which should be an array of strings.", "'outputField1' should indicate success or failure with a boolean value.", "'outputField2' should contain messages represented as strings." ], "input": { "level1": { "level2": { "level3": { "level4": { "level5": { "field1": "test_string", "field2": 42 } } } } } } } ``` -------------------------------------------------------------------------------- /dspy/teleprompt/teleprompt.py: -------------------------------------------------------------------------------- ```python from typing import Any from dspy.primitives import Example, Module class Teleprompter: def __init__(self): pass def compile(self, student: Module, *, trainset: list[Example], teacher: Module | None = None, valset: list[Example] | None = None, **kwargs) -> Module: """ Optimize the student program. Args: student: The student program to optimize. trainset: The training set to use for optimization. teacher: The teacher program to use for optimization. valset: The validation set to use for optimization. Returns: The optimized student program. """ raise NotImplementedError def get_params(self) -> dict[str, Any]: """ Get the parameters of the teleprompter. Returns: The parameters of the teleprompter. """ return self.__dict__ ``` -------------------------------------------------------------------------------- /tests/reliability/input_formats/generated/test_markdown_1/inputs/input2.json: -------------------------------------------------------------------------------- ```json { "assertions": [ "Each entry in the TOC should be a markdown link pointing to the corresponding section in the document.", "The hierarchy of the TOC should match the levels of headings in the input markdown content (e.g., H1 headings as top-level, H2 headings nested under H1, etc.).", "The TOC should include all headings from the input markdown content, in the order they appear.", "The TOC should not include any non-heading content from the input markdown document." ], "input": { "markdown_content": "# Introduction\n\nThis is the introduction section.\n\n## Overview\n\nAn overview of the document.\n\n### Details\n\nMore detailed information.\n\n#### Subdetails\n\nEven more detailed information.\n\n## Another Section\n\nContent of another section.\n\n### Subsection\n\nDetails of the subsection.\n\n```python\ndef example_function():\n print(\"Hello, World!\")\n```\n\n# Conclusion\n\nFinal thoughts." } } ``` -------------------------------------------------------------------------------- /docs/docs/api/modules/Predict.md: -------------------------------------------------------------------------------- ```markdown # dspy.Predict <!-- START_API_REF --> ::: dspy.Predict handler: python options: members: - __call__ - acall - aforward - batch - deepcopy - dump_state - forward - get_config - get_lm - inspect_history - load - load_state - map_named_predictors - named_parameters - named_predictors - named_sub_modules - parameters - predictors - reset - reset_copy - save - set_lm - update_config show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true ::: <!-- END_API_REF --> ``` -------------------------------------------------------------------------------- /tests/evaluate/test_metrics.py: -------------------------------------------------------------------------------- ```python # FILEPATH: /Users/ahle/repos/dspy/tests/evaluate/test_metrics.py import dspy from dspy.evaluate.metrics import answer_exact_match from dspy.predict import Predict def test_answer_exact_match_string(): example = dspy.Example( question="What is 1+1?", answer="2", ).with_inputs("question") pred = Predict("question -> answer") pred.answer = "2" assert answer_exact_match(example, pred) def test_answer_exact_match_list(): example = dspy.Example( question="What is 1+1?", answer=["2", "two"], ).with_inputs("question") pred = Predict("question -> answer") pred.answer = "2" assert answer_exact_match(example, pred) def test_answer_exact_match_no_match(): example = dspy.Example( question="What is 1+1?", answer="2", ).with_inputs("question") pred = Predict("question -> answer") pred.answer = "3" assert not answer_exact_match(example, pred) ``` -------------------------------------------------------------------------------- /dspy/teleprompt/vanilla.py: -------------------------------------------------------------------------------- ```python import random from dspy.teleprompt.teleprompt import Teleprompter class LabeledFewShot(Teleprompter): def __init__(self, k=16): self.k = k def compile(self, student, *, trainset, sample=True): self.student = student.reset_copy() self.trainset = trainset if len(self.trainset) == 0: return self.student rng = random.Random(0) for predictor in self.student.predictors(): if sample: predictor.demos = rng.sample(self.trainset, min(self.k, len(self.trainset))) else: predictor.demos = self.trainset[: min(self.k, len(self.trainset))] return self.student # NOTE: I believe templatev2 keeps rdemos as long as they have the last field. # This may change later, especially with the introduction of required vs optional fields. # NOTE: Since we're relying on downstream code to handle the demos, this sampling may be sub-sampled. ``` -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- ```yaml name: Bug Report description: Report a bug in the project title: "[Bug] " labels: bug body: - type: markdown attributes: value: | ## 🐛 Bug Report Please fill out all required fields to help us diagnose and fix the issue. - type: textarea id: description attributes: label: "What happened?" description: "Clearly describe the unexpected behavior." placeholder: "Example: When I try to save a file, I get an error message..." validations: required: true - type: textarea id: steps-to-reproduce attributes: label: "Steps to reproduce" description: "Tell us how to reproduce the issue." placeholder: "Please provide a code snippet or a github gist for reproducing purpose." validations: required: true - type: input id: environment attributes: label: "DSPy version" description: "Tell us your DSPy version." validations: required: true ``` -------------------------------------------------------------------------------- /docs/docs/learn/index.md: -------------------------------------------------------------------------------- ```markdown --- sidebar_position: 1 --- # Learning DSPy: An Overview DSPy exposes a very small API that you can learn quickly. However, building a new AI system is a more open-ended journey of iterative development, in which you compose the tools and design patterns of DSPy to optimize for _your_ objectives. The three stages of building AI systems in DSPy are: 1) **DSPy Programming.** This is about defining your task, its constraints, exploring a few examples, and using that to inform your initial pipeline design. 2) **DSPy Evaluation.** Once your system starts working, this is the stage where you collect an initial development set, define your DSPy metric, and use these to iterate on your system more systematically. 3) **DSPy Optimization.** Once you have a way to evaluate your system, you use DSPy optimizers to tune the prompts or weights in your program. We suggest learning and applying DSPy in this order. For example, it's unproductive to launch optimization runs using a poorly designed program or a bad metric. ``` -------------------------------------------------------------------------------- /docs/docs/tutorials/rl_ai_program/index.md: -------------------------------------------------------------------------------- ```markdown # Experimental RL Optimization for DSPy This section explores cutting-edge reinforcement learning (RL) approaches for optimizing DSPy programs. These experimental techniques represent the frontier of AI program optimization, combining the power of RL with DSPy's modular programming paradigm to achieve even better performance on complex tasks. ## Advanced RL Optimization Techniques ### [RL for Privacy-Conscious Delegation](../rl_papillon/index.ipynb) Explore how reinforcement learning can optimize privacy-conscious AI systems. This tutorial demonstrates how RL agents can learn to balance task performance with privacy constraints, making intelligent decisions about when and how to delegate sensitive operations. ### [RL for Multi-Hop Research](../rl_multihop/index.ipynb) Learn to apply reinforcement learning to multi-hop reasoning tasks. This advanced tutorial shows how RL can optimize the search strategy in complex information retrieval scenarios, learning to navigate through multiple information sources more effectively. ``` -------------------------------------------------------------------------------- /tests/reliability/generate/__main__.py: -------------------------------------------------------------------------------- ```python import argparse from tests.reliability.generate import generate_test_cases if __name__ == "__main__": parser = argparse.ArgumentParser( description="Generate test cases by specifying configuration and input instructions." ) parser.add_argument( "-d", "--dst_path", type=str, required=True, help="Destination path where generated test cases will be saved." ) parser.add_argument( "-n", "--num_inputs", type=int, default=1, help="Number of input cases to generate (default: 1)." ) parser.add_argument( "-p", "--program_instructions", type=str, help="Additional instructions for the generated test program." ) parser.add_argument( "-i", "--input_instructions", type=str, help="Additional instructions for generating test inputs." ) args = parser.parse_args() generate_test_cases( dst_path=args.dst_path, num_inputs=args.num_inputs, program_instructions=args.program_instructions, input_instructions=args.input_instructions, ) ``` -------------------------------------------------------------------------------- /dspy/utils/exceptions.py: -------------------------------------------------------------------------------- ```python from dspy.signatures.signature import Signature class AdapterParseError(Exception): """Exception raised when adapter cannot parse the LM response.""" def __init__( self, adapter_name: str, signature: Signature, lm_response: str, message: str | None = None, parsed_result: str | None = None, ): self.adapter_name = adapter_name self.signature = signature self.lm_response = lm_response self.parsed_result = parsed_result message = f"{message}\n\n" if message else "" message = ( f"{message}" f"Adapter {adapter_name} failed to parse the LM response. \n\n" f"LM Response: {lm_response} \n\n" f"Expected to find output fields in the LM response: [{', '.join(signature.output_fields.keys())}] \n\n" ) if parsed_result is not None: message += f"Actual output fields parsed from the LM response: [{', '.join(parsed_result.keys())}] \n\n" super().__init__(message) ``` -------------------------------------------------------------------------------- /tests/reliability/input_formats/generated/test_markdown_1/schema.json: -------------------------------------------------------------------------------- ```json { "description": "The program is designed to generate a table of contents (TOC) from a given markdown document. It will parse the markdown content, identify headings, and create a hierarchical TOC based on the heading levels. The TOC will be presented in markdown format, with each entry linked to the corresponding section in the document.", "properties": { "markdown_content": { "desc": "The content of the markdown document from which the table of contents will be generated.", "description": "The content of the markdown document from which the table of contents will be generated.", "prefix": "Markdown Content:", "type": "string" }, "table_of_contents": { "desc": "The content of the markdown document from which the table of contents will be generated.", "description": "The content of the markdown document from which the table of contents will be generated.", "prefix": "Table Of Contents:", "type": "string" } }, "required": ["markdown_content", "table_of_contents"], "type": "object" } ``` -------------------------------------------------------------------------------- /.github/.tmp/.generated-actions/run-pypi-publish-in-docker-container/action.yml: -------------------------------------------------------------------------------- ```yaml {"name": "🏃", "description": "Run Docker container to upload Python distribution packages to PyPI", "inputs": {"user": {"description": "PyPI user", "required": false}, "password": {"description": "Password for your PyPI user or an access token", "required": false}, "repository-url": {"description": "The repository URL to use", "required": false}, "packages-dir": {"description": "The target directory for distribution", "required": false}, "verify-metadata": {"description": "Check metadata before uploading", "required": false}, "skip-existing": {"description": "Do not fail if a Python package distribution exists in the target package index", "required": false}, "verbose": {"description": "Show verbose output.", "required": false}, "print-hash": {"description": "Show hash values of files to be uploaded", "required": false}, "attestations": {"description": "[EXPERIMENTAL] Enable experimental support for PEP 740 attestations. Only works with PyPI and TestPyPI via Trusted Publishing.", "required": false}}, "runs": {"using": "docker", "image": "docker://ghcr.io/pypa/gh-action-pypi-publish:release-v1"}} ``` -------------------------------------------------------------------------------- /dspy/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.predict import * from dspy.primitives import * from dspy.retrievers import * from dspy.signatures import * from dspy.teleprompt import * from dspy.evaluate import Evaluate # isort: skip from dspy.clients import * # isort: skip from dspy.adapters import Adapter, ChatAdapter, JSONAdapter, XMLAdapter, TwoStepAdapter, Image, Audio, History, Type, Tool, ToolCalls, Code # isort: skip from dspy.utils.logging_utils import configure_dspy_loggers, disable_logging, enable_logging from dspy.utils.asyncify import asyncify from dspy.utils.syncify import syncify from dspy.utils.saving import load from dspy.streaming.streamify import streamify from dspy.utils.usage_tracker import track_usage from dspy.dsp.utils.settings import settings from dspy.dsp.colbertv2 import ColBERTv2 from dspy.clients import DSPY_CACHE from dspy.__metadata__ import __name__, __version__, __description__, __url__, __author__, __author_email__ configure_dspy_loggers(__name__) # Singleton definitions and aliasing configure = settings.configure context = settings.context BootstrapRS = BootstrapFewShotWithRandomSearch cache = DSPY_CACHE ``` -------------------------------------------------------------------------------- /dspy/utils/__init__.py: -------------------------------------------------------------------------------- ```python import os import requests from dspy.streaming.messages import StatusMessage, StatusMessageProvider from dspy.utils import exceptions from dspy.utils.annotation import experimental from dspy.utils.callback import BaseCallback, with_callbacks from dspy.utils.dummies import DummyLM, DummyVectorizer, dummy_rm from dspy.utils.inspect_history import pretty_print_history from dspy.utils.syncify import syncify def download(url): filename = os.path.basename(url) remote_size = int(requests.head(url, allow_redirects=True).headers.get("Content-Length", 0)) local_size = os.path.getsize(filename) if os.path.exists(filename) else 0 if local_size != remote_size: print(f"Downloading '{filename}'...") with requests.get(url, stream=True) as r, open(filename, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) __all__ = [ "download", "exceptions", "BaseCallback", "with_callbacks", "DummyLM", "DummyVectorizer", "dummy_rm", "experimental", "StatusMessage", "StatusMessageProvider", "pretty_print_history", ] ``` -------------------------------------------------------------------------------- /docs/docs/api/optimizers/SIMBA.md: -------------------------------------------------------------------------------- ```markdown # dspy.SIMBA <!-- START_API_REF --> ::: dspy.SIMBA handler: python options: members: - compile - get_params show_source: true show_root_heading: true heading_level: 2 docstring_style: google show_root_full_path: true show_object_full_path: false separate_signature: false inherited_members: true <!-- END_API_REF --> ## Example Usage ```python optimizer = dspy.SIMBA(metric=your_metric) optimized_program = optimizer.compile(your_program, trainset=your_trainset) # Save optimize program for future use optimized_program.save(f"optimized.json") ``` ## How `SIMBA` works SIMBA (Stochastic Introspective Mini-Batch Ascent) is a DSPy optimizer that uses the LLM to analyze its own performance and generate improvement rules. It samples mini-batches, identifies challenging examples with high output variability, then either creates self-reflective rules or adds successful examples as demonstrations. See [this great blog post](https://blog.mariusvach.com/posts/dspy-simba) from [Marius](https://x.com/rasmus1610) for more details. ``` -------------------------------------------------------------------------------- /.github/workflows/docs-push.yml: -------------------------------------------------------------------------------- ```yaml name: Update DSPy Docs on: push: branches: - main paths: - "docs/**" pull_request: paths: - "docs/**" jobs: build-test: if: github.event_name == 'pull_request' runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 - name: Set up Node.js uses: actions/setup-node@v3 with: node-version: "18" - name: Install dependencies and build run: | cd docs pip install -r requirements.txt mkdocs build update-docs-subtree: if: github.event_name == 'push' && github.repository == 'stanfordnlp/dspy' runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: fetch-depth: 0 - name: Push docs to separate repo uses: cpina/github-action-push-to-another-repository@main env: API_TOKEN_GITHUB: ${{ secrets.GH_PAT }} with: source-directory: "docs" destination-github-username: "krypticmouse" destination-repository-name: "dspy-docs" user-email: [email protected] target-branch: master ``` -------------------------------------------------------------------------------- /tests/docs/test_mkdocs_links.py: -------------------------------------------------------------------------------- ```python import os def test_nav_files_exist(): # Read mkdocs.yml docs_dir = os.path.join(os.path.dirname(__file__), "..", "..", "docs", "docs") yaml_path = os.path.join(os.path.dirname(__file__), "..", "..", "docs", "mkdocs.yml") # Read file and extract nav section with open(yaml_path) as f: content = f.read() # Find nav section nav_start = content.find("nav:") lines = content[nav_start:].split("\n") # Get markdown files md_files = [] for line in lines: if ".md" in line: # Extract the markdown filename and clean it up md_file = line.strip().split(":")[-1].strip() # Remove list markers and quotes md_file = md_file.lstrip("- ").strip("'").strip('"') if md_file.endswith(".md"): md_files.append(md_file) # Check if files exist missing = [] for file in md_files: if not os.path.exists(os.path.join(docs_dir, file)): missing.append(file) print("\nChecking files in:", docs_dir) print("Found MD files:", md_files) print("Missing files:", missing) assert not missing, f"Missing files: {missing}" ``` -------------------------------------------------------------------------------- /dspy/teleprompt/__init__.py: -------------------------------------------------------------------------------- ```python from dspy.teleprompt.avatar_optimizer import AvatarOptimizer from dspy.teleprompt.bettertogether import BetterTogether from dspy.teleprompt.bootstrap import BootstrapFewShot from dspy.teleprompt.bootstrap_finetune import BootstrapFinetune from dspy.teleprompt.bootstrap_trace import bootstrap_trace_data from dspy.teleprompt.copro_optimizer import COPRO from dspy.teleprompt.ensemble import Ensemble from dspy.teleprompt.infer_rules import InferRules from dspy.teleprompt.knn_fewshot import KNNFewShot from dspy.teleprompt.mipro_optimizer_v2 import MIPROv2 from dspy.teleprompt.random_search import BootstrapFewShotWithRandomSearch from dspy.teleprompt.simba import SIMBA from dspy.teleprompt.teleprompt import Teleprompter from dspy.teleprompt.teleprompt_optuna import BootstrapFewShotWithOptuna from dspy.teleprompt.vanilla import LabeledFewShot from .gepa.gepa import GEPA __all__ = [ "AvatarOptimizer", "BetterTogether", "BootstrapFewShot", "BootstrapFinetune", "COPRO", "Ensemble", "GEPA", "KNNFewShot", "MIPROv2", "BootstrapFewShotWithRandomSearch", "BootstrapFewShotWithOptuna", "LabeledFewShot", "InferRules", "SIMBA", "bootstrap_trace_data", ] ``` -------------------------------------------------------------------------------- /tests/reliability/complex_types/generated/test_many_types_1/inputs/input1.json: -------------------------------------------------------------------------------- ```json { "assertions": [ "The 'processedTupleField' should be a tuple containing a string and a number.", "The 'processedEnumField' should be one of the allowed enum values: 'option1', 'option2', or 'option3'.", "The 'processedDatetimeField' should be a date-time", "The 'processedLiteralField' should be exactly 'literalValue'.", "The 'processedObjectField' should contain 'subField1' (string), 'subField2' (number), and an additional boolean field 'additionalField'.", "The 'processedNestedObjectField' should contain 'tupleField' as a tuple with a string and float, 'enumField' (one of the allowed enum values), 'datetimeField' (string formatted as date-time), 'literalField' (exactly 'literalValue'), and an additional boolean field 'additionalField'." ], "input": { "datetimeField": "2023-10-12T07:20:50.52Z", "enumField": "option1", "literalField": "literalValue", "nestedObjectField": { "datetimeField": "2023-10-12T07:20:50.52Z", "enumField": "option2", "literalField": "literalValue", "tupleField": ["nestedString", 789] }, "objectField": { "subField1": "example", "subField2": 456 }, "tupleField": ["string1", 123] } } ``` -------------------------------------------------------------------------------- /.github/workflows/precommits_check.yml: -------------------------------------------------------------------------------- ```yaml name: Pre-commit checks on: workflow_dispatch: jobs: pre-commit-checks: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: "3.10" cache: "pip" - name: Check Pull Request Title uses: Slashgear/action-check-pr-title@main with: regexp: '(break|build|ci|docs|feat|fix|perf|refactor|style|test|ops|hotfix|release|maint|init|enh|revert)\([a-z,A-Z,0-9,\-,\_,\/,:]+\)(:)\s{1}([\w\s]+)' # Regex the title should match. - name: Getting changed files list id: files uses: jitterbit/get-changed-files@master - name: Checking changed files shell: bash run: | echo "Changed files" echo ${{ steps.files.outputs.all }} echo "GitHub Client version" echo $(gh --version) - name: Pre-Commit Checks run: | python -m pip install --upgrade pip pip install pre-commit echo "Running pre-commit scans:" # adding log display in case of pre-commit errors pre-commit run -v --files ${{ steps.files.outputs.all }} shell: bash ``` -------------------------------------------------------------------------------- /tests/reliability/generate/__init__.py: -------------------------------------------------------------------------------- ```python import os from typing import List, Optional from tests.reliability.generate.utils import ( GeneratedTestCase, generate_test_inputs, generate_test_program, load_generated_cases, load_generated_program, ) def generate_test_cases( dst_path: str, num_inputs: int = 1, program_instructions: Optional[str] = None, input_instructions: Optional[str] = None, ) -> list[GeneratedTestCase]: os.makedirs(dst_path, exist_ok=True) if _directory_contains_program(dst_path): print(f"Found an existing test program at path {dst_path}. Generating new" f" test inputs for this program.") else: print("Generating a new test program and test inputs") generate_test_program( dst_path=dst_path, additional_instructions=program_instructions, ) generate_test_inputs( dst_path=os.path.join(dst_path, "inputs"), program_path=os.path.join(dst_path, "program.py"), num_inputs=num_inputs, additional_instructions=input_instructions, ) return load_generated_cases(dir_path=dst_path) def _directory_contains_program(dir_path: str) -> bool: return any(file == "program.py" for file in os.listdir(dir_path)) ``` -------------------------------------------------------------------------------- /tests/predict/test_multi_chain_comparison.py: -------------------------------------------------------------------------------- ```python import dspy from dspy.utils.dummies import DummyLM class BasicQA(dspy.Signature): """Answer questions with short factoid answers.""" question = dspy.InputField() answer = dspy.OutputField(desc="often between 1 and 5 words") # Example completions generated by a model for reference completions = [ dspy.Prediction( rationale="I recall that during clear days, the sky often appears this color.", answer="blue", ), dspy.Prediction( rationale="Based on common knowledge, I believe the sky is typically seen as this color.", answer="green", ), dspy.Prediction( rationale="From images and depictions in media, the sky is frequently represented with this hue.", answer="blue", ), ] def test_basic_example(): # Pass signature to MultiChainComparison module compare_answers = dspy.MultiChainComparison(BasicQA) # Call the MultiChainComparison on the completions question = "What is the color of the sky?" lm = DummyLM([{"rationale": "my rationale", "answer": "blue"}]) dspy.settings.configure(lm=lm) final_pred = compare_answers(completions, question=question) assert final_pred.rationale == "my rationale" assert final_pred.answer == "blue" ``` -------------------------------------------------------------------------------- /tests/teleprompt/test_random_search.py: -------------------------------------------------------------------------------- ```python import dspy from dspy import Example from dspy.predict import Predict from dspy.teleprompt import BootstrapFewShotWithRandomSearch from dspy.utils.dummies import DummyLM class SimpleModule(dspy.Module): def __init__(self, signature): super().__init__() self.predictor = Predict(signature) def forward(self, **kwargs): return self.predictor(**kwargs) def simple_metric(example, prediction, trace=None): return example.output == prediction.output def test_basic_workflow(): """Test to ensure the basic compile flow runs without errors.""" student = SimpleModule("input -> output") teacher = SimpleModule("input -> output") lm = DummyLM( [ "Initial thoughts", "Finish[blue]", # Expected output for both training and validation ] ) dspy.settings.configure(lm=lm) optimizer = BootstrapFewShotWithRandomSearch(metric=simple_metric, max_bootstrapped_demos=1, max_labeled_demos=1) trainset = [ Example(input="What is the color of the sky?", output="blue").with_inputs("input"), Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!").with_inputs("input"), ] optimizer.compile(student, teacher=teacher, trainset=trainset) ``` -------------------------------------------------------------------------------- /tests/reliability/complex_types/generated/test_nesting_2/inputs/input1.json: -------------------------------------------------------------------------------- ```json { "assertions": [ "The output should contain a 'customer_summary' object with the required properties: 'customer_id', 'customer_type', and 'value'.", "'customer_summary.customer_id' should be a string and match the 'customer_id' from the input.", "'customer_summary.customer_type' should be an object containing 'is_premium' (a boolean) and 'category' (a string).", "'customer_summary.value' should be a string and reflect the 'value' from the input's customer details.", "The output should contain a 'transaction_summary' object with the required properties: 'transaction_id', 'total_amount', and 'details'.", "'transaction_summary.transaction_id' should be a string and match the 'transaction_id' from the input.", "'transaction_summary.total_amount' should be a number and match the 'amount' from the input.", "'transaction_summary.details' should be an object containing 'value' (a number) and 'timestamp' (a date-time value)." ], "input": { "customer": { "customer_id": "C12345", "customer_type": true, "details": { "age": 30, "value": "Gold" } }, "transaction": { "amount": 150.75, "details": { "timestamp": "2023-10-01T10:00:00Z", "value": 150.75 }, "transaction_id": "T98765" } } } ``` -------------------------------------------------------------------------------- /dspy/teleprompt/ensemble.py: -------------------------------------------------------------------------------- ```python import random from dspy.teleprompt.teleprompt import Teleprompter """ TODO: The EnsembledProgram should actually imitate the structure of the individual programs (IF they are all compatible). This allows compiling with an ensemble program as a (singular) teacher. Basically the top majority-compatible trace will end up being used, if dspy.majority is the reduce_fn. """ class Ensemble(Teleprompter): def __init__(self, *, reduce_fn=None, size=None, deterministic=False): """A common reduce_fn is dspy.majority.""" assert deterministic is False, "TODO: Implement example hashing for deterministic ensemble." self.reduce_fn = reduce_fn self.size = size self.deterministic = deterministic def compile(self, programs): size = self.size reduce_fn = self.reduce_fn import dspy class EnsembledProgram(dspy.Module): def __init__(self): super().__init__() self.programs = programs def forward(self, *args, **kwargs): programs = random.sample(self.programs, size) if size else self.programs outputs = [prog(*args, **kwargs) for prog in programs] if reduce_fn: return reduce_fn(outputs) return outputs return EnsembledProgram() ``` -------------------------------------------------------------------------------- /dspy/utils/hasher.py: -------------------------------------------------------------------------------- ```python from pickle import dumps from typing import Any import xxhash """ The following class was pulled from the `datasets` package from Hugging Face. The reason for vendoring this code is to avoid a hard dependency on `datasets`, which is a large package that is not needed for the majority of use cases. License: Apache License 2.0 Author: Hugging Face Inc. URL: https://github.com/huggingface/datasets/blob/fa73ab472eecf9136a3daf7a0fbff16a3dffa7a6/src/datasets/fingerprint.py#L170 Changes: 2025-08-10 - Ran ruff to format the code to DSPy styles. """ class Hasher: """Hasher that accepts python objects as inputs.""" dispatch: dict = {} def __init__(self): self.m = xxhash.xxh64() @classmethod def hash_bytes(cls, value: bytes | list[bytes]) -> str: value = [value] if isinstance(value, bytes) else value m = xxhash.xxh64() for x in value: m.update(x) return m.hexdigest() @classmethod def hash(cls, value: Any) -> str: return cls.hash_bytes(dumps(value)) def update(self, value: Any) -> None: header_for_update = f"=={type(value)}==" value_for_update = self.hash(value) self.m.update(header_for_update.encode("utf8")) self.m.update(value_for_update.encode("utf-8")) def hexdigest(self) -> str: return self.m.hexdigest() ``` -------------------------------------------------------------------------------- /tests/datasets/test_dataset.py: -------------------------------------------------------------------------------- ```python import tempfile import uuid import pytest from dspy import Example from dspy.datasets.dataset import Dataset dummy_data = """content,question,answer "This is content 1","What is this?","This is answer 1" "This is content 2","What is that?","This is answer 2" """ class CSVDataset(Dataset): def __init__(self, file_path, input_keys=None, **kwargs) -> None: import pandas as pd super().__init__(input_keys=input_keys, **kwargs) df = pd.read_csv(file_path) data = df.to_dict(orient="records") self._train = [ Example(**record, dspy_uuid=str(uuid.uuid4()), dspy_split="train").with_inputs(*input_keys) for record in data[:1] ] self._dev = [ Example(**record, dspy_uuid=str(uuid.uuid4()), dspy_split="dev").with_inputs(*input_keys) for record in data[1:2] ] @pytest.fixture def csv_file(): with tempfile.NamedTemporaryFile(mode="w+", suffix=".csv") as tmp_file: tmp_file.write(dummy_data) tmp_file.flush() yield tmp_file.name @pytest.mark.extra def test_input_keys(csv_file): dataset = CSVDataset(csv_file, input_keys=["content", "question"]) assert dataset.train is not None for example in dataset.train: inputs = example.inputs() assert inputs is not None assert "content" in inputs assert "question" in inputs assert set(example._input_keys) == {"content", "question"} ``` -------------------------------------------------------------------------------- /dspy/utils/langchain_tool.py: -------------------------------------------------------------------------------- ```python from typing import TYPE_CHECKING, Any from dspy.adapters.types.tool import Tool, convert_input_schema_to_tool_args if TYPE_CHECKING: from langchain.tools import BaseTool def convert_langchain_tool(tool: "BaseTool") -> Tool: """Build a DSPy tool from a LangChain tool. This function converts a LangChain tool (either created with @tool decorator or by subclassing BaseTool) into a DSPy Tool. Args: tool: The LangChain tool to convert. Returns: A DSPy Tool object. """ async def func(**kwargs): try: result = await tool.ainvoke(kwargs) return result except Exception as e: raise RuntimeError(f"Failed to call LangChain tool {tool.name}: {e!s}") # Get args_schema from the tool # https://python.langchain.com/api_reference/core/tools/langchain_core.tools.base.BaseTool.html#langchain_core.tools.base.BaseTool.args_schema args_schema = tool.args_schema args, _, arg_desc = convert_input_schema_to_tool_args(args_schema.model_json_schema()) # The args_schema of Langchain tool is a pydantic model, so we can get the type hints from the model fields arg_types = { key: field.annotation if field.annotation is not None else Any for key, field in args_schema.model_fields.items() } return Tool( func=func, name=tool.name, desc=tool.description, args=args, arg_types=arg_types, arg_desc=arg_desc ) ``` -------------------------------------------------------------------------------- /tests/predict/test_aggregation.py: -------------------------------------------------------------------------------- ```python from dspy.evaluate import normalize_text from dspy.predict.aggregation import majority from dspy.primitives.prediction import Completions, Prediction def test_majority_with_prediction(): prediction = Prediction.from_completions([{"answer": "2"}, {"answer": "2"}, {"answer": "3"}]) result = majority(prediction) assert result.completions[0]["answer"] == "2" def test_majority_with_completions(): completions = Completions([{"answer": "2"}, {"answer": "2"}, {"answer": "3"}]) result = majority(completions) assert result.completions[0]["answer"] == "2" def test_majority_with_list(): completions = [{"answer": "2"}, {"answer": "2"}, {"answer": "3"}] result = majority(completions) assert result.completions[0]["answer"] == "2" def test_majority_with_normalize(): completions = [{"answer": "2"}, {"answer": " 2"}, {"answer": "3"}] result = majority(completions, normalize=normalize_text) assert result.completions[0]["answer"] == "2" def test_majority_with_field(): completions = [ {"answer": "2", "other": "1"}, {"answer": "2", "other": "1"}, {"answer": "3", "other": "2"}, ] result = majority(completions, field="other") assert result.completions[0]["other"] == "1" def test_majority_with_no_majority(): completions = [{"answer": "2"}, {"answer": "3"}, {"answer": "4"}] result = majority(completions) assert result.completions[0]["answer"] == "2" # The first completion is returned in case of a tie ``` -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- ```python import copy import os import pytest from tests.test_utils.server import litellm_test_server, read_litellm_test_server_request_logs # noqa: F401 SKIP_DEFAULT_FLAGS = ["reliability", "extra", "llm_call"] @pytest.fixture(autouse=True) def clear_settings(): """Ensures that the settings are cleared after each test.""" yield import dspy from dspy.dsp.utils.settings import DEFAULT_CONFIG dspy.settings.configure(**copy.deepcopy(DEFAULT_CONFIG), inherit_config=False) @pytest.fixture def anyio_backend(): return "asyncio" # Taken from: https://gist.github.com/justinmklam/b2aca28cb3a6896678e2e2927c6b6a38 def pytest_addoption(parser): for flag in SKIP_DEFAULT_FLAGS: parser.addoption( f"--{flag}", action="store_true", default=False, help=f"run {flag} tests", ) def pytest_configure(config): for flag in SKIP_DEFAULT_FLAGS: config.addinivalue_line("markers", flag) def pytest_collection_modifyitems(config, items): for flag in SKIP_DEFAULT_FLAGS: if config.getoption(f"--{flag}"): return skip_mark = pytest.mark.skip(reason=f"need --{flag} option to run") for item in items: if flag in item.keywords: item.add_marker(skip_mark) @pytest.fixture def lm_for_test(): model = os.environ.get("LM_FOR_TEST", None) if model is None: pytest.skip("LM_FOR_TEST is not set in the environment variables") return model ``` -------------------------------------------------------------------------------- /tests/reliability/input_formats/generated/test_markdown_1/program.py: -------------------------------------------------------------------------------- ```python ### Input models ### from pydantic import BaseModel, Field class ProgramInputs(BaseModel): markdown_content: str = Field( ..., description="The content of the markdown document from which the table of contents will be generated.", ) ### Output models ### from pydantic import BaseModel, Field class ProgramOutputs(BaseModel): table_of_contents: str = Field(..., description="The generated table of contents in markdown format.") ### Program definition ### import dspy class BaseSignature(dspy.Signature): """ The program is designed to generate a table of contents (TOC) from a given markdown document. It will parse the markdown content, identify headings, and create a hierarchical TOC based on the heading levels. The TOC will be presented in markdown format, with each entry linked to the corresponding section in the document. """ program_signature = BaseSignature for input_field_name, input_field in ProgramInputs.model_fields.items(): program_signature = program_signature.append( name=input_field_name, field=dspy.InputField(description=input_field.description), type_=input_field.annotation, ) for output_field_name, output_field in ProgramOutputs.model_fields.items(): program_signature = program_signature.append( name=output_field_name, field=dspy.OutputField(description=input_field.description), type_=output_field.annotation, ) program = dspy.ChainOfThought(program_signature) ``` -------------------------------------------------------------------------------- /dspy/utils/mcp.py: -------------------------------------------------------------------------------- ```python from typing import TYPE_CHECKING, Any from dspy.adapters.types.tool import Tool, convert_input_schema_to_tool_args if TYPE_CHECKING: import mcp def _convert_mcp_tool_result(call_tool_result: "mcp.types.CallToolResult") -> str | list[Any]: from mcp.types import TextContent text_contents: list[TextContent] = [] non_text_contents = [] for content in call_tool_result.content: if isinstance(content, TextContent): text_contents.append(content) else: non_text_contents.append(content) tool_content = [content.text for content in text_contents] if len(text_contents) == 1: tool_content = tool_content[0] if call_tool_result.isError: raise RuntimeError(f"Failed to call a MCP tool: {tool_content}") return tool_content or non_text_contents def convert_mcp_tool(session: "mcp.ClientSession", tool: "mcp.types.Tool") -> Tool: """Build a DSPy tool from an MCP tool. Args: session: The MCP session to use. tool: The MCP tool to convert. Returns: A dspy Tool object. """ args, arg_types, arg_desc = convert_input_schema_to_tool_args(tool.inputSchema) # Convert the MCP tool and Session to a single async method async def func(*args, **kwargs): result = await session.call_tool(tool.name, arguments=kwargs) return _convert_mcp_tool_result(result) return Tool(func=func, name=tool.name, desc=tool.description, args=args, arg_types=arg_types, arg_desc=arg_desc) ``` -------------------------------------------------------------------------------- /tests/teleprompt/test_utils.py: -------------------------------------------------------------------------------- ```python from unittest.mock import Mock import dspy from dspy.teleprompt.utils import eval_candidate_program class DummyModule(dspy.Module): def __init__(self): super().__init__() def forward(self, **kwargs): pass def test_eval_candidate_program_full_trainset(): trainset = [1, 2, 3, 4, 5] candidate_program = DummyModule() evaluate = Mock(return_value=0) batch_size = 10 result = eval_candidate_program(batch_size, trainset, candidate_program, evaluate) evaluate.assert_called_once() _, called_kwargs = evaluate.call_args assert len(called_kwargs["devset"]) == len(trainset) assert called_kwargs["callback_metadata"] == {"metric_key": "eval_full"} assert result == 0 def test_eval_candidate_program_minibatch(): trainset = [1, 2, 3, 4, 5] candidate_program = DummyModule() evaluate = Mock(return_value=0) batch_size = 3 result = eval_candidate_program(batch_size, trainset, candidate_program, evaluate) evaluate.assert_called_once() _, called_kwargs = evaluate.call_args assert len(called_kwargs["devset"]) == batch_size assert called_kwargs["callback_metadata"] == {"metric_key": "eval_minibatch"} assert result == 0 def test_eval_candidate_program_failure(): trainset = [1, 2, 3, 4, 5] candidate_program = DummyModule() evaluate = Mock(side_effect=ValueError("Error")) batch_size = 3 result = eval_candidate_program(batch_size, trainset, candidate_program, evaluate) assert result.score == 0.0 ``` -------------------------------------------------------------------------------- /docs/docs/tutorials/optimize_ai_program/index.md: -------------------------------------------------------------------------------- ```markdown # Optimize AI Programs with DSPy This section focuses on DSPy's powerful optimization capabilities, demonstrating how to systematically improve your AI programs using various optimizers. These tutorials are lighter on programming concepts and instead showcase how DSPy optimizers can automatically enhance the quality and performance of your applications. ## Mathematical and Reasoning Tasks ### [Math Reasoning](../math/index.ipynb) Learn how to optimize DSPy programs for mathematical reasoning tasks. This tutorial demonstrates how optimizers can dramatically improve performance on complex math problems by finding better prompting strategies and few-shot examples. ## Model Optimization ### [Classification Finetuning](../classification_finetuning/index.ipynb) Discover how to use DSPy's finetuning optimizers to distill knowledge from large language models into smaller, more efficient models. Learn the complete workflow from prompt optimization to model finetuning for classification tasks. ## Advanced Tool Integration ### [Advanced Tool Use](../tool_use/index.ipynb) Explore how to optimize AI programs that use external tools and APIs. This tutorial shows how DSPy optimizers can learn to use tools more effectively, improving both accuracy and efficiency in tool-calling scenarios. ### [Finetuning Agents](../games/index.ipynb) Learn to optimize complex agent-based systems through finetuning. This tutorial demonstrates how to improve agent performance in interactive environments like games, where strategic thinking and adaptation are crucial. ``` -------------------------------------------------------------------------------- /tests/adapters/test_base_type.py: -------------------------------------------------------------------------------- ```python import pydantic import dspy def test_basic_extract_custom_type_from_annotation(): class Event(dspy.Type): event_name: str start_date_time: str end_date_time: str | None location: str | None class ExtractEvent(dspy.Signature): """Extract all events from the email content.""" email: str = dspy.InputField() event: Event = dspy.OutputField() assert dspy.Type.extract_custom_type_from_annotation(ExtractEvent.output_fields["event"].annotation) == [Event] class ExtractEvents(dspy.Signature): """Extract all events from the email content.""" email: str = dspy.InputField() events: list[Event] = dspy.OutputField() assert dspy.Type.extract_custom_type_from_annotation(ExtractEvents.output_fields["events"].annotation) == [Event] def test_extract_custom_type_from_annotation_with_nested_type(): class Event(dspy.Type): event_name: str start_date_time: str end_date_time: str | None location: str | None class EventIdentifier(dspy.Type): model_config = pydantic.ConfigDict(frozen=True) # Make it hashable event_id: str event_name: str class ExtractEvents(dspy.Signature): """Extract all events from the email content.""" email: str = dspy.InputField() events: list[dict[EventIdentifier, Event]] = dspy.OutputField() assert dspy.Type.extract_custom_type_from_annotation(ExtractEvents.output_fields["events"].annotation) == [ EventIdentifier, Event, ] ``` -------------------------------------------------------------------------------- /dspy/predict/chain_of_thought.py: -------------------------------------------------------------------------------- ```python from typing import Any from pydantic.fields import FieldInfo import dspy from dspy.primitives.module import Module from dspy.signatures.signature import Signature, ensure_signature class ChainOfThought(Module): def __init__( self, signature: str | type[Signature], rationale_field: FieldInfo | None = None, rationale_field_type: type = str, **config: dict[str, Any], ): """ A module that reasons step by step in order to predict the output of a task. Args: signature (Type[dspy.Signature]): The signature of the module. rationale_field (Optional[Union[dspy.OutputField, pydantic.fields.FieldInfo]]): The field that will contain the reasoning. rationale_field_type (Type): The type of the rationale field. **config: The configuration for the module. """ super().__init__() signature = ensure_signature(signature) prefix = "Reasoning: Let's think step by step in order to" desc = "${reasoning}" rationale_field_type = rationale_field.annotation if rationale_field else rationale_field_type rationale_field = rationale_field if rationale_field else dspy.OutputField(prefix=prefix, desc=desc) extended_signature = signature.prepend(name="reasoning", field=rationale_field, type_=rationale_field_type) self.predict = dspy.Predict(extended_signature, **config) def forward(self, **kwargs): return self.predict(**kwargs) async def aforward(self, **kwargs): return await self.predict.acall(**kwargs) ``` -------------------------------------------------------------------------------- /docs/docs/production/index.md: -------------------------------------------------------------------------------- ```markdown # Using DSPy in Production <div class="grid cards" style="text-align: left;" markdown> - :material-earth:{ .lg .middle } __Real-World Use Cases__ --- DSPy is deployed in production by many enterprises and startups. Explore real-world case studies. [:octicons-arrow-right-24: Use Cases](../community/use-cases.md) - :material-magnify-expand:{ .lg .middle } __Monitoring & Observability__ --- Monitor your DSPy programs using **MLflow Tracing**, based on OpenTelemetry. [:octicons-arrow-right-24: Set Up Observability](../tutorials/observability/index.md#tracing) - :material-ab-testing: __Reproducibility__ --- Log programs, metrics, configs, and environments for full reproducibility with DSPy's native MLflow integration. [:octicons-arrow-right-24: MLflow Integration](https://mlflow.org/docs/latest/llms/dspy/index.html) - :material-rocket-launch: __Deployment__ --- When it's time to productionize, deploy your application easily with DSPy's integration with MLflow Model Serving. [:octicons-arrow-right-24: Deployment Guide](../tutorials/deployment/index.md) - :material-arrow-up-right-bold: __Scalability__ --- DSPy is designed with thread-safety in mind and offers native asynchronous execution support for high-throughput environments. [:octicons-arrow-right-24: Async Program](../api/utils/asyncify.md) - :material-alert-rhombus: __Guardrails & Controllability__ --- DSPy's **Signatures**, **Modules**, and **Optimizers** help you control and guide LM outputs. [:octicons-arrow-right-24: Learn Signature](../learn/programming/signatures.md) </div> ``` -------------------------------------------------------------------------------- /tests/adapters/test_document.py: -------------------------------------------------------------------------------- ```python import pydantic import pytest from dspy.experimental import Document def test_document_validate_input(): # Create a `Document` instance with valid data. doc = Document(data="The Earth orbits the Sun.") assert doc.data == "The Earth orbits the Sun." with pytest.raises(pydantic.ValidationError): # Try to create a `Document` instance with invalid type. Document(data=123) def test_document_in_nested_type(): class Wrapper(pydantic.BaseModel): document: Document doc = Document(data="Hello, world!") wrapper = Wrapper(document=doc) assert wrapper.document.data == "Hello, world!" def test_document_with_all_fields(): doc = Document( data="Water boils at 100°C at standard pressure.", title="Physics Facts", media_type="application/pdf", context="Laboratory conditions" ) assert doc.data == "Water boils at 100°C at standard pressure." assert doc.title == "Physics Facts" assert doc.media_type == "application/pdf" assert doc.context == "Laboratory conditions" def test_document_format(): doc = Document( data="The sky is blue.", title="Color Facts", media_type="text/plain" ) formatted = doc.format() assert isinstance(formatted, list) assert len(formatted) == 1 doc_block = formatted[0] assert doc_block["type"] == "document" assert doc_block["source"]["type"] == "text" assert doc_block["source"]["media_type"] == "text/plain" assert doc_block["source"]["data"] == "The sky is blue." assert doc_block["title"] == "Color Facts" assert doc_block["citations"]["enabled"] is True ``` -------------------------------------------------------------------------------- /tests/utils/test_langchain_tool.py: -------------------------------------------------------------------------------- ```python import importlib import pytest if importlib.util.find_spec("langchain_core") is None: pytest.skip(reason="langchain_core is not installed", allow_module_level=True) from pydantic import BaseModel from dspy.utils.langchain_tool import convert_langchain_tool @pytest.mark.asyncio @pytest.mark.extra async def test_convert_custom_simple_tool(): from langchain_core.tools import tool @tool def add(a: int, b: int) -> int: """Add two numbers.""" return a + b tool = convert_langchain_tool(add) assert tool.name == "add" assert tool.desc == "Add two numbers." assert tool.args == {"a": {"title": "A", "type": "integer"}, "b": {"title": "B", "type": "integer"}} assert tool.arg_types == {"a": int, "b": int} assert tool.arg_desc == {"a": "No description provided. (Required)", "b": "No description provided. (Required)"} assert await tool.acall(a=1, b=2) == 3 @pytest.mark.asyncio @pytest.mark.extra async def test_convert_custom_tool_with_custom_class(): from langchain_core.tools import tool class Profile(BaseModel): name: str age: int @tool def get_age(profile: Profile) -> int: """Get the age of the profile.""" return profile.age tool = convert_langchain_tool(get_age) assert tool.name == "get_age" assert tool.desc == "Get the age of the profile." assert tool.args == {"profile": {"title": "Profile", "type": "object", "properties": {"name": {"title": "Name", "type": "string"}, "age": {"title": "Age", "type": "integer"}}, "required": ["name", "age"]}} assert tool.arg_types == {"profile": Profile} assert tool.arg_desc == {"profile": "No description provided. (Required)"} assert await tool.acall(profile=Profile(name="John", age=20)) == 20 ``` -------------------------------------------------------------------------------- /tests/adapters/test_code.py: -------------------------------------------------------------------------------- ```python import inspect import pydantic import pytest import dspy def test_code_validate_input(): # Create a `dspy.Code` instance with valid code. code = dspy.Code["python"](code="print('Hello, world!')") assert code.code == "print('Hello, world!')" with pytest.raises(ValueError): # Try to create a `dspy.Code` instance with invalid type. dspy.Code["python"](code=123) def foo(x): return x + 1 code_source = inspect.getsource(foo) code = dspy.Code["python"](code=code_source) assert code.code == code_source def test_code_in_nested_type(): class Wrapper(pydantic.BaseModel): code: dspy.Code code = dspy.Code(code="print('Hello, world!')") wrapper = Wrapper(code=code) assert wrapper.code.code == "print('Hello, world!')" def test_code_with_language(): java_code = dspy.Code["java"](code="System.out.println('Hello, world!');") assert java_code.code == "System.out.println('Hello, world!');" assert java_code.language == "java" assert "Programming language: java" in java_code.description() cpp_code = dspy.Code["cpp"](code="std::cout << 'Hello, world!' << std::endl;") assert cpp_code.code == "std::cout << 'Hello, world!' << std::endl;" assert cpp_code.language == "cpp" assert "Programming language: cpp" in cpp_code.description() def test_code_parses_from_dirty_code(): dirty_code = "```python\nprint('Hello, world!')```" code = dspy.Code(code=dirty_code) assert code.code == "print('Hello, world!')" dirty_code_with_reasoning = """ The generated code is: ```python print('Hello, world!') ``` The reasoning is: The code is a simple print statement. """ code = dspy.Code(code=dirty_code_with_reasoning) assert code.code == "print('Hello, world!')" ``` -------------------------------------------------------------------------------- /dspy/utils/saving.py: -------------------------------------------------------------------------------- ```python import logging import sys from pathlib import Path from typing import TYPE_CHECKING import cloudpickle import orjson if TYPE_CHECKING: from dspy.primitives.module import Module logger = logging.getLogger(__name__) def get_dependency_versions(): import dspy cloudpickle_version = ".".join(cloudpickle.__version__.split(".")[:2]) return { "python": f"{sys.version_info.major}.{sys.version_info.minor}", "dspy": dspy.__version__, "cloudpickle": cloudpickle_version, } def load(path: str) -> "Module": """Load saved DSPy model. This method is used to load a saved DSPy model with `save_program=True`, i.e., the model is saved with cloudpickle. Args: path (str): Path to the saved model. Returns: The loaded model, a `dspy.Module` instance. """ path = Path(path) if not path.exists(): raise FileNotFoundError(f"The path '{path}' does not exist.") with open(path / "metadata.json") as f: metadata = orjson.loads(f.read()) dependency_versions = get_dependency_versions() saved_dependency_versions = metadata["dependency_versions"] for key, saved_version in saved_dependency_versions.items(): if dependency_versions[key] != saved_version: logger.warning( f"There is a mismatch of {key} version between saved model and current environment. You saved with " f"`{key}=={saved_version}`, but now you have `{key}=={dependency_versions[key]}`. This might cause " "errors or performance downgrade on the loaded model, please consider loading the model in the same " "environment as the saving environment." ) with open(path / "program.pkl", "rb") as f: return cloudpickle.load(f) ``` -------------------------------------------------------------------------------- /dspy/predict/aggregation.py: -------------------------------------------------------------------------------- ```python from dspy.evaluate import normalize_text from dspy.primitives.prediction import Completions, Prediction def default_normalize(s): return normalize_text(s) or None def majority(prediction_or_completions, normalize=default_normalize, field=None): """ Returns the most common completion for the target field (or the last field) in the signature. When normalize returns None, that completion is ignored. In case of a tie, earlier completion are prioritized. """ assert any(isinstance(prediction_or_completions, t) for t in [Prediction, Completions, list]) type(prediction_or_completions) # Get the completions if isinstance(prediction_or_completions, Prediction): completions = prediction_or_completions.completions else: completions = prediction_or_completions try: signature = completions.signature except Exception: signature = None if not field: if signature: field = list(signature.output_fields.keys())[-1] else: field = list(completions[0].keys())[-1] # Normalize normalize = normalize if normalize else lambda x: x normalized_values = [normalize(completion[field]) for completion in completions] normalized_values_ = [x for x in normalized_values if x is not None] # Count value_counts = {} for value in normalized_values_ or normalized_values: value_counts[value] = value_counts.get(value, 0) + 1 majority_value = max(value_counts, key=value_counts.get) # Return the first completion with the majority value in the field for completion in completions: if normalize(completion[field]) == majority_value: break # if input_type == Prediction: return Prediction.from_completions([completion], signature=signature) ``` -------------------------------------------------------------------------------- /docs/docs/tutorials/gepa_ai_program/index.md: -------------------------------------------------------------------------------- ```markdown # Reflective Prompt Evolution with GEPA This section introduces GEPA, a reflective prompt optimizer for DSPy. GEPA works by leveraging LM's ability to reflect on the DSPy program's trajectory, identifying what went well, what didn't, and what can be improved. Based on this reflection, GEPA proposes new prompts, building a tree of evolved prompt candidates, accumulating improvements as the optimization progresses. Since GEPA can leverage domain-specific text feedback (as opposed to only the scalar metric), GEPA can often propose high performing prompts in very few rollouts. GEPA was introduced in the paper [GEPA: Reflective Prompt Evolution Can Outperform Reinforcement Learning](https://arxiv.org/abs/2507.19457) and available as `dspy.GEPA` which internally uses the GEPA implementation provided in [gepa-ai/gepa](https://github.com/gepa-ai/gepa). ## `dspy.GEPA` Tutorials ### [GEPA for AIME (Math)](../gepa_aime/index.ipynb) This tutorial explores how GEPA can optimize a single `dspy.ChainOfThought` based program to achieve 10% gains on AIME 2025 with GPT-4.1 Mini! ### [GEPA for Structured Information Extraction for Enterprise Tasks](../gepa_facilitysupportanalyzer/index.ipynb) This tutorial explores how GEPA leverages predictor-level feedback to improve GPT-4.1 Nano's performance on a three-part task for structured information extraction and classification in an enterprise setting. ### [GEPA for Privacy-Conscious Delegation](../gepa_papillon/index.ipynb) This tutorial explores how GEPA can improve rapidly in as few as 1 iteration, while leveraging a simple feedback provided by a LLM-as-a-judge metric. The tutorial also explores how GEPA benefits from the textual feedback showing a breakdown of aggregate metrics into sub-components, allowing the reflection LM to identify what aspects of the task need improvement. ``` -------------------------------------------------------------------------------- /tests/utils/test_syncify.py: -------------------------------------------------------------------------------- ```python import asyncio import dspy def test_syncify_in_place(): class MyProgram(dspy.Module): async def aforward(self, x: int) -> int: await asyncio.sleep(0.01) return x + 1 sync_program = dspy.syncify(MyProgram()) assert sync_program(1) == 2 assert sync_program(2) == 3 def test_syncify_with_wrapper(): class MyProgram(dspy.Module): async def aforward(self, x: int) -> int: await asyncio.sleep(0.01) return x + 1 sync_program = dspy.syncify(MyProgram(), in_place=False) assert sync_program(1) == 2 assert sync_program(2) == 3 def test_syncify_works_with_optimizers(): class MyProgram(dspy.Module): def __init__(self): self.predict = dspy.Predict("question->answer") async def aforward(self, question: str): return await self.predict.acall(question=question) async_program = MyProgram() def dummy_metric(gold, pred, traces=None): return True # We only test the optimizer completes without errors, so the LM response doesn't matter. lm = dspy.utils.DummyLM([{"answer": "dummy"} for _ in range(100)]) dspy.configure(lm=lm) dataset = [dspy.Example(question="question", answer="answer").with_inputs("question") for _ in range(10)] optimizer = dspy.BootstrapFewShot(metric=dummy_metric, max_bootstrapped_demos=2, max_labeled_demos=0) # Test syncify in place sync_program = dspy.syncify(async_program, in_place=True) optimized_program = optimizer.compile(sync_program, trainset=dataset) assert len(optimized_program.predictors()[0].demos) == 2 # Test syncify with wrapper sync_program = dspy.syncify(async_program, in_place=False) optimized_program = optimizer.compile(sync_program, trainset=dataset) assert len(optimized_program.predictors()[0].demos) == 2 ``` -------------------------------------------------------------------------------- /dspy/predict/multi_chain_comparison.py: -------------------------------------------------------------------------------- ```python from dspy.predict.predict import Predict from dspy.primitives.module import Module from dspy.signatures import InputField, OutputField from dspy.signatures.signature import ensure_signature class MultiChainComparison(Module): def __init__(self, signature, M=3, temperature=0.7, **config): # noqa: N803 super().__init__() self.M = M signature = ensure_signature(signature) *_, self.last_key = signature.output_fields.keys() for idx in range(M): signature = signature.append( f"reasoning_attempt_{idx+1}", InputField( prefix=f"Student Attempt #{idx+1}:", desc="${reasoning attempt}", ), ) signature = signature.prepend( "rationale", OutputField( prefix="Accurate Reasoning: Thank you everyone. Let's now holistically", desc="${corrected reasoning}", ), ) self.predict = Predict(signature, temperature=temperature, **config) def forward(self, completions, **kwargs): attempts = [] for c in completions: rationale = c.get("rationale", c.get("reasoning")).strip().split("\n")[0].strip() answer = str(c[self.last_key]).strip().split("\n")[0].strip() attempts.append( f"«I'm trying to {rationale} I'm not sure but my prediction is {answer}»", ) assert ( len(attempts) == self.M ), f"The number of attempts ({len(attempts)}) doesn't match the expected number M ({self.M}). Please set the correct value for M when initializing MultiChainComparison." kwargs = { **{f"reasoning_attempt_{idx+1}": attempt for idx, attempt in enumerate(attempts)}, **kwargs, } return self.predict(**kwargs) ``` -------------------------------------------------------------------------------- /dspy/datasets/math.py: -------------------------------------------------------------------------------- ```python import random import re class MATH: def __init__(self, subset): from datasets import load_dataset import dspy ds = load_dataset("DigitalLearningGmbH/MATH-lighteval", subset) # NOTE: Defaults to sub-splitting MATH's 'test' split into train/dev/test, presuming that current # LMs are trained on MATH's train. Makes no difference for gpt-4o-mini, but might for other models. dataset = [ dspy.Example( question=example["problem"], reasoning=example["solution"], answer=extract_answer(example["solution"]) ).with_inputs("question") for example in ds["test"] ] size = min(350, len(dataset) // 3) random.Random(0).shuffle(dataset) self.train, self.dev, self.test = dataset[:size], dataset[size : 2 * size], dataset[2 * size :] def metric(self, example, pred, trace=None): try: import math_equivalence except ImportError: raise ImportError("MATH's metric requires `pip install git+https://github.com/hendrycks/math.git`") return math_equivalence.is_equiv(example.answer, pred.answer) def extract_answer(s): start = s.find("\\boxed{") if start == -1: return None idx = start + len("\\boxed{") brace_level = 1 answer = "" while idx < len(s) and brace_level > 0: c = s[idx] if c == "{": brace_level += 1 elif c == "}": brace_level -= 1 if brace_level == 0: break answer += c idx += 1 answer = re.sub(r"\\text\{[^}]*\}", "", answer) answer = re.sub(r"\\!", "", answer) return answer.strip() """ NOTE: MATH's official math_equivalence.is_equiv does not seem to have perfect recall. Consider its behavior on reference values like `left[\frac{1}{2}, \frac{4}{3}\right]`. """ ``` -------------------------------------------------------------------------------- /tests/teleprompt/test_ensemble.py: -------------------------------------------------------------------------------- ```python import pytest import dspy from dspy.teleprompt import Ensemble class MockProgram(dspy.Module): def __init__(self, output): super().__init__() self.output = output def forward(self, *args, **kwargs): return self.output # Simple reduction function to test with def mock_reduce_fn(outputs): return sum(outputs) / len(outputs) def test_ensemble_without_reduction(): """Test that Ensemble correctly combines outputs without applying a reduce_fn.""" programs = [MockProgram(i) for i in range(5)] ensemble = Ensemble() ensembled_program = ensemble.compile(programs) outputs = ensembled_program() assert len(outputs) == 5, "Ensemble did not combine the correct number of outputs" def test_ensemble_with_reduction(): """Test that Ensemble correctly applies a reduce_fn to combine outputs.""" programs = [MockProgram(i) for i in range(5)] ensemble = Ensemble(reduce_fn=mock_reduce_fn) ensembled_program = ensemble.compile(programs) output = ensembled_program() expected_output = sum(range(5)) / 5 assert output == expected_output, "Ensemble did not correctly apply the reduce_fn" def test_ensemble_with_size_limitation(): """Test that specifying a size limits the number of programs used in the ensemble.""" programs = [MockProgram(i) for i in range(10)] ensemble_size = 3 ensemble = Ensemble(size=ensemble_size) ensembled_program = ensemble.compile(programs) outputs = ensembled_program() assert len(outputs) == ensemble_size, "Ensemble did not respect the specified size limitation" def test_ensemble_deterministic_behavior(): """Verify that the Ensemble class raises an assertion for deterministic behavior.""" with pytest.raises( AssertionError, match="TODO: Implement example hashing for deterministic ensemble.", ): Ensemble(deterministic=True) ``` -------------------------------------------------------------------------------- /tests/teleprompt/test_grpo.py: -------------------------------------------------------------------------------- ```python from dspy.teleprompt.grpo import GRPO def test_grpo_dataset_shuffler(): dataset = [1, 2, 3] grpo = GRPO( num_dspy_examples_per_grpo_step=3, exclude_demos=True, ) trainset_instances = [] for i in range(4): trainset_instances.append(grpo.select_training_sample_and_update_shuffled_trainset(dataset, i)) assert len(trainset_instances[-1]) == 3 assert set(trainset_instances[-1]) == set(dataset) def test_grpo_dataset_shuffler_with_num_ex_per_step_less_dataset(): dataset = [1, 2, 3] grpo = GRPO( num_dspy_examples_per_grpo_step=2, exclude_demos=True, ) trainset_instances = [] for i in range(15): trainset_instances.append(grpo.select_training_sample_and_update_shuffled_trainset(dataset, i)) assert len(trainset_instances[-1]) == 2 from collections import Counter counter = Counter() for instance in trainset_instances: counter.update(instance) assert len(counter) == 3 for i in counter: assert counter[i] == 10 def test_grpo_dataset_shuffler_with_num_ex_per_step_greater_dataset(): dataset = [1, 2, 3] grpo = GRPO( num_dspy_examples_per_grpo_step=5, exclude_demos=True, ) trainset_instances = [] for i in range(6): trainset_instances.append(grpo.select_training_sample_and_update_shuffled_trainset(dataset, i)) assert len(trainset_instances[-1]) == 5 from collections import Counter counter = Counter() for instance in trainset_instances: counter.update(instance) assert len(counter) == 3 for i in counter: assert counter[i] == 10 if __name__ == "__main__": test_grpo_dataset_shuffler() test_grpo_dataset_shuffler_with_num_ex_per_step_less_dataset() test_grpo_dataset_shuffler_with_num_ex_per_step_greater_dataset() print("All tests passed!") ``` -------------------------------------------------------------------------------- /dspy/predict/knn.py: -------------------------------------------------------------------------------- ```python import numpy as np from dspy.clients import Embedder from dspy.primitives import Example class KNN: def __init__(self, k: int, trainset: list[Example], vectorizer: Embedder): """ A k-nearest neighbors retriever that finds similar examples from a training set. Args: k: Number of nearest neighbors to retrieve trainset: List of training examples to search through vectorizer: The `Embedder` to use for vectorization Example: ```python import dspy from sentence_transformers import SentenceTransformer # Create a training dataset with examples trainset = [ dspy.Example(input="hello", output="world"), # ... more examples ... ] # Initialize KNN with a sentence transformer model knn = KNN( k=3, trainset=trainset, vectorizer=dspy.Embedder(SentenceTransformer("all-MiniLM-L6-v2").encode) ) # Find similar examples similar_examples = knn(input="hello") ``` """ self.k = k self.trainset = trainset self.embedding = vectorizer trainset_casted_to_vectorize = [ " | ".join([f"{key}: {value}" for key, value in example.items() if key in example._input_keys]) for example in self.trainset ] self.trainset_vectors = self.embedding(trainset_casted_to_vectorize).astype(np.float32) def __call__(self, **kwargs) -> list: input_example_vector = self.embedding([" | ".join([f"{key}: {val}" for key, val in kwargs.items()])]) scores = np.dot(self.trainset_vectors, input_example_vector.T).squeeze() nearest_samples_idxs = scores.argsort()[-self.k :][::-1] return [self.trainset[cur_idx] for cur_idx in nearest_samples_idxs] ``` -------------------------------------------------------------------------------- /dspy/utils/syncify.py: -------------------------------------------------------------------------------- ```python import asyncio from types import MethodType from typing import TYPE_CHECKING if TYPE_CHECKING: from dspy.primitives.module import Module def run_async(coro): """Run an async coroutine from a synchronous context.""" try: loop = asyncio.get_running_loop() except RuntimeError: loop = None if loop and loop.is_running(): # If we're in a running event loop (e.g., Jupyter), use asyncio.create_task and run until done import nest_asyncio nest_asyncio.apply() return asyncio.get_event_loop().run_until_complete(coro) else: return asyncio.run(coro) def syncify(program: "Module", in_place: bool = True) -> "Module": """Convert an async DSPy module to a sync program. There are two modes of this function: - `in_place=True` (recommended): Modify the module in place. But this may not work if you already have a `forward` method which does different things from `aforward`. - `in_place=False`: Return a wrapper module. This changes the module's architecture, but it's more robust. Args: program: The async program to convert, must have an `aforward` method implemented. in_place: If True, modify the module in place. Otherwise, return a wrapper module. Returns: The sync program, which has a `forward` method that can be called from a synchronous context. """ if in_place: def forward(self, *args, **kwargs): return run_async(self.aforward(*args, **kwargs)) # Create the `forward` method in place. program.forward = MethodType(forward, program) return program else: from dspy.primitives.module import Module class SyncWrapper(Module): def __init__(self, program: "Module"): self.program = program def forward(self, *args, **kwargs): return run_async(self.program.aforward(*args, **kwargs)) return SyncWrapper(program) ``` -------------------------------------------------------------------------------- /tests/utils/test_asyncify.py: -------------------------------------------------------------------------------- ```python import asyncio import math from time import sleep, time import pytest import dspy from dspy.utils.asyncify import get_limiter @pytest.mark.anyio async def test_async_limiter(): limiter = get_limiter() assert limiter.total_tokens == 8, "Default async capacity should be 8" assert get_limiter() == limiter, "AsyncLimiter should be a singleton" with dspy.context(async_max_workers=16): assert get_limiter() == limiter, "AsyncLimiter should be a singleton" assert get_limiter().total_tokens == 16, "Async capacity should be 16" assert get_limiter() == get_limiter(), "AsyncLimiter should be a singleton" @pytest.mark.anyio async def test_asyncify(): def the_answer_to_life_the_universe_and_everything(wait: float): sleep(wait) return 42 ask_the_question = dspy.asyncify(the_answer_to_life_the_universe_and_everything) async def run_n_tasks(n: int, wait: float): await asyncio.gather(*[ask_the_question(wait) for _ in range(n)]) async def verify_asyncify(capacity: int, number_of_tasks: int, wait: float = 0.5): with dspy.context(async_max_workers=capacity): start = time() await run_n_tasks(number_of_tasks, wait) end = time() total_time = end - start # If asyncify is working correctly, the total time should be less than the total number of loops # `(number_of_tasks / capacity)` times wait time, plus the computational overhead. The lower bound should # be `math.floor(number_of_tasks * 1.0 / capacity) * wait` because there are more than # `math.floor(number_of_tasks * 1.0 / capacity)` loops. lower_bound = math.floor(number_of_tasks * 1.0 / capacity) * wait upper_bound = math.ceil(number_of_tasks * 1.0 / capacity) * wait + 2 * wait # 2*wait for buffer assert lower_bound < total_time < upper_bound await verify_asyncify(4, 10) await verify_asyncify(8, 15) await verify_asyncify(8, 30) ``` -------------------------------------------------------------------------------- /tests/reliability/input_formats/generated/test_markdown_1/inputs/input1.json: -------------------------------------------------------------------------------- ```json { "assertions": [ "Each top-level heading (indicated by `#`) should appear as a top-level entry in the TOC.", "Each second-level heading (indicated by `##`) should be nested under the appropriate top-level heading in the TOC.", "Each third-level heading (indicated by `###`) should be nested under the appropriate second-level heading in the TOC.", "Each entry in the TOC should be linked to the corresponding section in the document, using markdown link syntax." ], "input": { "markdown_content": "# The American Space Program\n\nThe American space program has a rich history of exploration and discovery.\n\n## Early Beginnings\n\nThe journey began in the late 1950s with the launch of the first artificial satellite.\n\n### The Space Race\n\nThe competition between the United States and the Soviet Union led to rapid advancements in space technology.\n\n## Moon Landing\n\nIn 1969, NASA successfully landed the first humans on the moon.\n\n### Apollo Missions\n\nThe Apollo missions were a series of spaceflights that landed humans on the moon and brought them back safely.\n\n## Space Shuttle Era\n\nThe development of the Space Shuttle program marked a new era in space exploration.\n\n### Reusable Spacecraft\n\nThe Space Shuttle was the first reusable spacecraft, capable of multiple missions.\n\n## International Space Station\n\nThe International Space Station (ISS) is a collaborative effort between multiple countries.\n\n### Living in Space\n\nAstronauts live and work on the ISS for extended periods, conducting scientific research.\n\n## Future Missions\n\nNASA continues to plan for future missions to Mars and beyond.\n\n### Mars Exploration\n\nExploration of Mars is a key objective for NASA's future missions.\n\n### Beyond Mars\n\nThe ultimate goal is to explore beyond Mars and into the outer reaches of the solar system.\n\n## Conclusion\n\nThe American space program has achieved many milestones and continues to push the boundaries of space exploration." } } ``` -------------------------------------------------------------------------------- /tests/predict/test_knn.py: -------------------------------------------------------------------------------- ```python import numpy as np import pytest import dspy from dspy.predict import KNN from dspy.utils import DummyVectorizer def mock_example(question: str, answer: str) -> dspy.Example: """Creates a mock DSP example with specified question and answer.""" return dspy.Example(question=question, answer=answer).with_inputs("question") @pytest.fixture def setup_knn() -> KNN: """Sets up a KNN instance with a mocked vectorizer for testing.""" trainset = [ mock_example("What is the capital of France?", "Paris"), mock_example("What is the largest ocean?", "Pacific"), mock_example("What is 2+2?", "4"), ] return KNN(k=2, trainset=trainset, vectorizer=dspy.Embedder(DummyVectorizer())) def test_knn_initialization(setup_knn): """Tests the KNN initialization and checks if the trainset vectors are correctly created.""" knn = setup_knn assert knn.k == 2, "Incorrect k value" assert len(knn.trainset_vectors) == 3, "Incorrect size of trainset vectors" assert isinstance(knn.trainset_vectors, np.ndarray), "Trainset vectors should be a NumPy array" def test_knn_query(setup_knn): """Tests the KNN query functionality for retrieving the nearest neighbors.""" knn = setup_knn query = {"question": "What is 3+3?"} # A query close to "What is 2+2?" nearest_samples = knn(**query) assert len(nearest_samples) == 2, "Incorrect number of nearest samples returned" assert nearest_samples[0].answer == "4", "Incorrect nearest sample returned" def test_knn_query_specificity(setup_knn): """Tests the KNN query functionality for specificity of returned examples.""" knn = setup_knn query = {"question": "What is the capital of Germany?"} # A query close to "What is the capital of France?" nearest_samples = knn(**query) assert len(nearest_samples) == 2, "Incorrect number of nearest samples returned" assert "Paris" in [sample.answer for sample in nearest_samples], "Expected Paris to be a nearest sample answer" ``` -------------------------------------------------------------------------------- /dspy/retrievers/retrieve.py: -------------------------------------------------------------------------------- ```python import random from dspy.predict.parameter import Parameter from dspy.primitives.prediction import Prediction from dspy.utils.callback import with_callbacks def single_query_passage(passages): passages_dict = {key: [] for key in list(passages[0].keys())} for docs in passages: for key, value in docs.items(): passages_dict[key].append(value) if "long_text" in passages_dict: passages_dict["passages"] = passages_dict.pop("long_text") return Prediction(**passages_dict) class Retrieve(Parameter): name = "Search" input_variable = "query" desc = "takes a search query and returns one or more potentially relevant passages from a corpus" def __init__(self, k=3, callbacks=None): self.stage = random.randbytes(8).hex() self.k = k self.callbacks = callbacks or [] def reset(self): pass def dump_state(self): state_keys = ["k"] return {k: getattr(self, k) for k in state_keys} def load_state(self, state): for name, value in state.items(): setattr(self, name, value) @with_callbacks def __call__(self, *args, **kwargs): return self.forward(*args, **kwargs) def forward( self, query: str, k: int | None = None, **kwargs, ) -> list[str] | Prediction | list[Prediction]: k = k if k is not None else self.k import dspy if not dspy.settings.rm: raise AssertionError("No RM is loaded.") passages = dspy.settings.rm(query, k=k, **kwargs) from collections.abc import Iterable if not isinstance(passages, Iterable): # it's not an iterable yet; make it one. # TODO: we should unify the type signatures of dspy.Retriever passages = [passages] passages = [psg.long_text for psg in passages] return Prediction(passages=passages) # TODO: Consider doing Prediction.from_completions with the individual sets of passages (per query) too. ``` -------------------------------------------------------------------------------- /tests/propose/test_grounded_proposer.py: -------------------------------------------------------------------------------- ```python import pytest import dspy from dspy.predict import Predict from dspy.propose.grounded_proposer import GroundedProposer from dspy.utils.dummies import DummyLM @pytest.mark.parametrize( "demo_candidates", [ None, [[[dspy.Example(question="What is the capital of France?", answer="Paris")]]], ], ) def test_propose_instructions_for_program(demo_candidates): # Set large number here so that lm always returns the same response prompt_model = DummyLM([{"proposed_instruction": "instruction"}] * 10) program = Predict("question -> answer") trainset = [] proposer = GroundedProposer(prompt_model=prompt_model, program=program, trainset=trainset, verbose=False) result = proposer.propose_instructions_for_program( trainset=trainset, program=program, demo_candidates=demo_candidates, trial_logs={}, N=1 ) assert isinstance(result, dict) assert len(result) == len(program.predictors()) for pred_instructions in result.values(): assert pred_instructions == ["instruction"] @pytest.mark.parametrize( "demo_candidates", [ None, [[[dspy.Example(question="What is the capital of France?", answer="Paris")]]], ], ) def test_propose_instruction_for_predictor(demo_candidates): class TrackingDummyLM(DummyLM): def copy(self, **kwargs): self.last_copy_kwargs = kwargs return super().copy(**kwargs) prompt_model = TrackingDummyLM([{"proposed_instruction": "instruction"}] * 10) program = Predict("question -> answer") proposer = GroundedProposer( prompt_model=prompt_model, program=program, trainset=[], verbose=False, init_temperature=0.7, ) result = proposer.propose_instruction_for_predictor( program=program, predictor=None, pred_i=0, demo_candidates=demo_candidates, demo_set_i=0, trial_logs={}, tip=None, ) assert result == "instruction" assert prompt_model.last_copy_kwargs["temperature"] == 0.7 ``` -------------------------------------------------------------------------------- /tests/utils/test_unbatchify.py: -------------------------------------------------------------------------------- ```python import time from concurrent.futures import Future from unittest.mock import MagicMock from dspy.utils.unbatchify import Unbatchify def simple_batch_processor(batch): """A simple batch function that adds 1 to each item.""" return [item + 1 for item in batch] def submit(self, input_item: any) -> Future: """Submits an item for processing and returns a Future.""" future = Future() self.input_queue.put((input_item, future)) return future Unbatchify.submit = submit def test_unbatchify_batch_size_trigger(): """Test that the batch processes exactly when max_batch_size is reached.""" batch_fn_mock = MagicMock(wraps=simple_batch_processor) unbatcher = Unbatchify(batch_fn=batch_fn_mock, max_batch_size=2, max_wait_time=5.0) futures = [] futures.append(unbatcher.submit(10)) time.sleep(0.02) assert batch_fn_mock.call_count == 0 futures.append(unbatcher.submit(20)) results_1_2 = [f.result() for f in futures] assert batch_fn_mock.call_count == 1 batch_fn_mock.assert_called_once_with([10, 20]) assert results_1_2 == [11, 21] futures_3_4 = [] futures_3_4.append(unbatcher.submit(30)) futures_3_4.append(unbatcher.submit(40)) results_3_4 = [f.result() for f in futures_3_4] time.sleep(0.01) assert batch_fn_mock.call_count == 2 assert batch_fn_mock.call_args_list[1].args[0] == [30, 40] assert results_3_4 == [31, 41] unbatcher.close() def test_unbatchify_timeout_trigger(): """Test that the batch processes after max_wait_time.""" batch_fn_mock = MagicMock(wraps=simple_batch_processor) wait_time = 0.15 unbatcher = Unbatchify(batch_fn=batch_fn_mock, max_batch_size=5, max_wait_time=wait_time) futures = [] futures.append(unbatcher.submit(100)) futures.append(unbatcher.submit(200)) time.sleep(wait_time / 2) assert batch_fn_mock.call_count == 0 results = [f.result() for f in futures] assert batch_fn_mock.call_count == 1 batch_fn_mock.assert_called_once_with([100, 200]) assert results == [101, 201] unbatcher.close() ``` -------------------------------------------------------------------------------- /dspy/utils/logging_utils.py: -------------------------------------------------------------------------------- ```python import logging import logging.config import sys LOGGING_LINE_FORMAT = "%(asctime)s %(levelname)s %(name)s: %(message)s" LOGGING_DATETIME_FORMAT = "%Y/%m/%d %H:%M:%S" class DSPyLoggingStream: """ A Python stream for use with event logging APIs throughout DSPy (`eprint()`, `logger.info()`, etc.). This stream wraps `sys.stderr`, forwarding `write()` and `flush()` calls to the stream referred to by `sys.stderr` at the time of the call. It also provides capabilities for disabling the stream to silence event logs. """ def __init__(self): self._enabled = True def write(self, text): if self._enabled: sys.stderr.write(text) def flush(self): if self._enabled: sys.stderr.flush() @property def enabled(self): return self._enabled @enabled.setter def enabled(self, value): self._enabled = value DSPY_LOGGING_STREAM = DSPyLoggingStream() def disable_logging(): """ Disables the `DSPyLoggingStream` used by event logging APIs throughout DSPy (`eprint()`, `logger.info()`, etc), silencing all subsequent event logs. """ DSPY_LOGGING_STREAM.enabled = False def enable_logging(): """ Enables the `DSPyLoggingStream` used by event logging APIs throughout DSPy (`eprint()`, `logger.info()`, etc), emitting all subsequent event logs. This reverses the effects of `disable_logging()`. """ DSPY_LOGGING_STREAM.enabled = True def configure_dspy_loggers(root_module_name): formatter = logging.Formatter(fmt=LOGGING_LINE_FORMAT, datefmt=LOGGING_DATETIME_FORMAT) dspy_handler_name = "dspy_handler" handler = logging.StreamHandler(stream=DSPY_LOGGING_STREAM) handler.setFormatter(formatter) handler.set_name(dspy_handler_name) logger = logging.getLogger(root_module_name) logger.setLevel(logging.INFO) logger.propagate = False for existing_handler in logger.handlers[:]: if getattr(existing_handler, "name", None) == dspy_handler_name: logger.removeHandler(existing_handler) logger.addHandler(handler) ``` -------------------------------------------------------------------------------- /dspy/adapters/types/history.py: -------------------------------------------------------------------------------- ```python from typing import Any import pydantic class History(pydantic.BaseModel): """Class representing the conversation history. The conversation history is a list of messages, each message entity should have keys from the associated signature. For example, if you have the following signature: ``` class MySignature(dspy.Signature): question: str = dspy.InputField() history: dspy.History = dspy.InputField() answer: str = dspy.OutputField() ``` Then the history should be a list of dictionaries with keys "question" and "answer". Example: ``` import dspy dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) class MySignature(dspy.Signature): question: str = dspy.InputField() history: dspy.History = dspy.InputField() answer: str = dspy.OutputField() history = dspy.History( messages=[ {"question": "What is the capital of France?", "answer": "Paris"}, {"question": "What is the capital of Germany?", "answer": "Berlin"}, ] ) predict = dspy.Predict(MySignature) outputs = predict(question="What is the capital of France?", history=history) ``` Example of capturing the conversation history: ``` import dspy dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) class MySignature(dspy.Signature): question: str = dspy.InputField() history: dspy.History = dspy.InputField() answer: str = dspy.OutputField() predict = dspy.Predict(MySignature) outputs = predict(question="What is the capital of France?") history = dspy.History(messages=[{"question": "What is the capital of France?", **outputs}]) outputs_with_history = predict(question="Are you sure?", history=history) ``` """ messages: list[dict[str, Any]] model_config = pydantic.ConfigDict( frozen=True, str_strip_whitespace=True, validate_assignment=True, extra="forbid", ) ``` -------------------------------------------------------------------------------- /dspy/utils/asyncify.py: -------------------------------------------------------------------------------- ```python from typing import TYPE_CHECKING, Any, Awaitable, Callable import asyncer from anyio import CapacityLimiter if TYPE_CHECKING: from dspy.primitives.module import Module _limiter = None def get_async_max_workers(): import dspy return dspy.settings.async_max_workers def get_limiter(): async_max_workers = get_async_max_workers() global _limiter if _limiter is None: _limiter = CapacityLimiter(async_max_workers) elif _limiter.total_tokens != async_max_workers: _limiter.total_tokens = async_max_workers return _limiter def asyncify(program: "Module") -> Callable[[Any, Any], Awaitable[Any]]: """ Wraps a DSPy program so that it can be called asynchronously. This is useful for running a program in parallel with another task (e.g., another DSPy program). This implementation propagates the current thread's configuration context to the worker thread. Args: program: The DSPy program to be wrapped for asynchronous execution. Returns: An async function: An async function that, when awaited, runs the program in a worker thread. The current thread's configuration context is inherited for each call. """ async def async_program(*args, **kwargs) -> Any: # Capture the current overrides at call-time. from dspy.dsp.utils.settings import thread_local_overrides parent_overrides = thread_local_overrides.get().copy() def wrapped_program(*a, **kw): from dspy.dsp.utils.settings import thread_local_overrides original_overrides = thread_local_overrides.get() token = thread_local_overrides.set({**original_overrides, **parent_overrides.copy()}) try: return program(*a, **kw) finally: thread_local_overrides.reset(token) # Create a fresh asyncified callable each time, ensuring the latest context is used. call_async = asyncer.asyncify(wrapped_program, abandon_on_cancel=True, limiter=get_limiter()) return await call_async(*args, **kwargs) return async_program ``` -------------------------------------------------------------------------------- /docs/docs/tutorials/real_world_examples/index.md: -------------------------------------------------------------------------------- ```markdown # Real-World Examples This section demonstrates practical applications of DSPy across different domains and use cases. Each tutorial shows how to build production-ready AI systems using DSPy's modular programming approach. ## Featured Examples ### 📄 [Generating llms.txt](../llms_txt_generation/index.md) Learn how to create AI-powered documentation generators that analyze codebases and produce structured, LLM-friendly documentation following the llms.txt standard. **Key Concepts:** Repository analysis, meta-programming, documentation generation ### 📧 [Email Information Extraction](../email_extraction/index.md) Build intelligent email processing systems that classify messages, extract entities, and identify action items using DSPy's structured prediction capabilities. **Key Concepts:** Information extraction, classification, text processing ### 🧠 [Memory-Enabled ReAct Agents with Mem0](../mem0_react_agent/index.md) Create conversational agents with persistent memory using DSPy ReAct and Mem0 integration for context-aware interactions across sessions. **Key Concepts:** Memory systems, conversational AI, agent persistence ### 💰 [Financial Analysis with Yahoo Finance](../yahoo_finance_react/index.md) Develop financial analysis agents that fetch real-time market data, analyze news sentiment, and provide investment insights using LangChain tool integration. **Key Concepts:** Tool integration, financial data, real-time analysis ### 🔄 [Automated Code Generation from Documentation](../sample_code_generation/index.md) Build a system that automatically fetches documentation from URLs and generates working code examples for any library using DSPy's intelligent analysis. **Key Concepts:** Web scraping, documentation parsing, automated learning, code generation ### 🎮 [Building a Creative Text-Based AI Game](../ai_text_game/index.md) Create an interactive text-based adventure game with dynamic storytelling, AI-powered NPCs, and adaptive gameplay using DSPy's modular programming approach. **Key Concepts:** Interactive storytelling, game state management, character progression, AI-driven narratives ``` -------------------------------------------------------------------------------- /docs/docs/learn/evaluation/overview.md: -------------------------------------------------------------------------------- ```markdown --- sidebar_position: 1 --- # Evaluation in DSPy Once you have an initial system, it's time to **collect an initial development set** so you can refine it more systematically. Even 20 input examples of your task can be useful, though 200 goes a long way. Depending on your _metric_, you either just need inputs and no labels at all, or you need inputs and the _final_ outputs of your system. (You almost never need labels for the intermediate steps in your program in DSPy.) You can probably find datasets that are adjacent to your task on, say, HuggingFace datasets or in a naturally occurring source like StackExchange. If there's data whose licenses are permissive enough, we suggest you use them. Otherwise, you can label a few examples by hand or start deploying a demo of your system and collect initial data that way. Next, you should **define your DSPy metric**. What makes outputs from your system good or bad? Invest in defining metrics and improving them incrementally over time; it's hard to consistently improve what you aren't able to define. A metric is a function that takes examples from your data and takes the output of your system, and returns a score. For simple tasks, this could be just "accuracy", e.g. for simple classification or short-form QA tasks. For most applications, your system will produce long-form outputs, so your metric will be a smaller DSPy program that checks multiple properties of the output. Getting this right on the first try is unlikely: start with something simple and iterate. Now that you have some data and a metric, run development evaluations on your pipeline designs to understand their tradeoffs. Look at the outputs and the metric scores. This will probably allow you to spot any major issues, and it will define a baseline for your next steps. ??? "If your metric is itself a DSPy program..." If your metric is itself a DSPy program, a powerful way to iterate is to optimize your metric itself. That's usually easy because the output of the metric is usually a simple value (e.g., a score out of 5), so the metric's metric is easy to define and optimize by collecting a few examples. ``` -------------------------------------------------------------------------------- /.github/.internal_dspyai/internals/release-checklist.md: -------------------------------------------------------------------------------- ```markdown # Release Checklist * [ ] On `main` Create a git tag with pattern X.Y.Z where X, Y, and Z follow the [semver pattern](https://semver.org/). Then push the tag to the origin git repo (github). * ```bash git tag X.Y.Z git push origin --tags ``` * This will trigger the github action to build and release the package. * [ ] Confirm the tests pass and the package has been published to pypi. * If the tests fail, you can remove the tag from your local and github repo using: ```bash git push origin --delete X.Y.Z # Delete on GitHub git tag -d X.Y.Z # Delete locally ``` * Fix the errors and then repeat the steps above to recreate the tag locally and push to GitHub to restart the process. * Note that the github action takes care of incrementing the release version on test-pypi automatically by adding a pre-release identifier in the scenario where the tests fail and you need to delete and push the same tag again. * [ ] [Create a release](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository) * [ ] Add release notes. You can make use of [automatically generated release notes](https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes) * If creating a new release for major or minor version: * [ ] Create a new release branch with the last commit and name it 'release/X.Y` * [ ] [Update the default branch](https://docs.github.com/en/organizations/managing-organization-settings/managing-the-default-branch-name-for-repositories-in-your-organization) on the github rep to the new release branch. ### Prerequisites The automation requires a [trusted publisher](https://docs.pypi.org/trusted-publishers/) to be set up on both the pypi and test-pypi packages. If the package is migrated to a new project, please follow the [steps](https://docs.pypi.org/trusted-publishers/adding-a-publisher/) to create a trusted publisher. If you have no releases on the new project, you may have to create a [pending trusted publisher](https://docs.pypi.org/trusted-publishers/creating-a-project-through-oidc/) to allow the first automated deployment. ``` -------------------------------------------------------------------------------- /tests/clients/test_inspect_global_history.py: -------------------------------------------------------------------------------- ```python import pytest import dspy from dspy.clients.base_lm import GLOBAL_HISTORY from dspy.utils.dummies import DummyLM @pytest.fixture(autouse=True) def clear_history(): GLOBAL_HISTORY.clear() yield def test_inspect_history_basic(capsys): # Configure a DummyLM with some predefined responses lm = DummyLM([{"response": "Hello"}, {"response": "How are you?"}]) dspy.settings.configure(lm=lm) # Make some calls to generate history predictor = dspy.Predict("query: str -> response: str") predictor(query="Hi") predictor(query="What's up?") # Test inspecting all history history = GLOBAL_HISTORY print(capsys) assert len(history) > 0 assert isinstance(history, list) assert all(isinstance(entry, dict) for entry in history) assert all("messages" in entry for entry in history) def test_inspect_history_with_n(capsys): """Test that inspect_history works with n Random failures in this test most likely mean you are printing messages somewhere """ lm = DummyLM([{"response": "One"}, {"response": "Two"}, {"response": "Three"}]) dspy.settings.configure(lm=lm) # Generate some history predictor = dspy.Predict("query: str -> response: str") predictor(query="First") predictor(query="Second") predictor(query="Third") dspy.inspect_history(n=2) # Test getting last 2 entries out, err = capsys.readouterr() assert "First" not in out assert "Second" in out assert "Third" in out def test_inspect_empty_history(capsys): # Configure fresh DummyLM lm = DummyLM([]) dspy.settings.configure(lm=lm) # Test inspecting empty history dspy.inspect_history() history = GLOBAL_HISTORY assert len(history) == 0 assert isinstance(history, list) def test_inspect_history_n_larger_than_history(capsys): lm = DummyLM([{"response": "First"}, {"response": "Second"}]) dspy.settings.configure(lm=lm) predictor = dspy.Predict("query: str -> response: str") predictor(query="Query 1") predictor(query="Query 2") # Request more entries than exist dspy.inspect_history(n=5) history = GLOBAL_HISTORY assert len(history) == 2 # Should return all available entries ``` -------------------------------------------------------------------------------- /docs/overrides/partials/tabs.html: -------------------------------------------------------------------------------- ```html <!-- Copyright (c) 2016-2023 Martin Donath <[email protected]> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --> {% import "partials/tabs-item.html" as item with context %} <!-- Navigation tabs --> <nav class="md-tabs" aria-label="{{ lang.t('tabs') }}" data-md-component="tabs" > <div class="md-tabs__inner md-grid"> <!-- Adds tab on right side of header --> {% if "FAQ" %} <ul class="md-tabs__list" style="float: right;"> <li class="md-tabs__item"> <a href="/production/" class="md-tabs__link"> DSPy in Production </a> </li> <li class="md-tabs__item"> <a href="/community/community-resources/" class="md-tabs__link"> Community </a> </li> <li class="md-tabs__item"> <a href="/faqs/" class="md-tabs__link"> FAQ </a> </li> </ul> {% endif %} <!-- Original tabbed sections --> <ul class="md-tabs__list"> {% for nav_item in nav %} {% if nav_item.title not in ["FAQ", "Community", "DSPy in Production"] %} {{ item.render(nav_item) }} {% endif %} {% endfor %} </ul> </div> </nav> ``` -------------------------------------------------------------------------------- /.github/workflows/build_utils/test_version.py: -------------------------------------------------------------------------------- ```python import sys from datetime import datetime import requests import semver from packaging.version import Version as PyPIVersion def get_latest_version(package_name, tag_version): # Returns latest version, and T/F as to whether it needs to be incremented response = requests.get(f"https://test.pypi.org/pypi/{package_name}/json") if response.status_code == 200: data = response.json() # Flatten the list of files for all releases and get the latest upload all_uploads = [ (release['upload_time'], release['filename'], version) for version, releases in data['releases'].items() for release in releases ] # If a release with tag_version does not exist, that is the latest version # Then increment is False, as no need to increment the version tag_release_exists = any(upload for upload in all_uploads if upload[2] == tag_version) if not(tag_release_exists): return tag_version, False # Else, get the latest release version, and set increment to True else: # Sort all uploads by upload time in descending order latest_upload = max(all_uploads, key=lambda x: datetime.fromisoformat(x[0].rstrip('Z'))) return latest_upload[2], True elif response.status_code == 404: # If no existing releases can get a 404 return tag_version, False return None, None def increment_version(curr_version): pypi_v = PyPIVersion(curr_version) if pypi_v.pre: pre = "".join([str(i) for i in pypi_v.pre]) parsed_v = semver.Version(*pypi_v.release, pre) else: parsed_v = semver.Version(*pypi_v.release) new_v = str(parsed_v.bump_prerelease()) return new_v if __name__ == "__main__": if len(sys.argv) != 3: raise ValueError("Usage: python get_latest_testpypi_version.py <package_name> <tag_version>") package_name = sys.argv[1] tag_v = sys.argv[2] latest_version, increment = get_latest_version(package_name, tag_v) if increment: new_version = increment_version(latest_version) else: new_version = latest_version # Output new version print(new_version) ```