stanfordnlp/dspy # codebase.md

This is page 2 of 14. Use http://codebase.md/stanfordnlp/dspy?lines=false&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── .internal_dspyai
│   │   ├── internals
│   │   │   ├── build-and-release.md
│   │   │   └── release-checklist.md
│   │   └── pyproject.toml
│   ├── .tmp
│   │   └── .generated-actions
│   │       └── run-pypi-publish-in-docker-container
│   │           └── action.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.yml
│   │   └── feature_request.yml
│   ├── PULL_REQUEST_TEMPLATE
│   │   └── pull_request_template.md
│   ├── workflow_scripts
│   │   └── install_testpypi_pkg.sh
│   └── workflows
│       ├── build_and_release.yml
│       ├── build_utils
│       │   └── test_version.py
│       ├── docs-push.yml
│       ├── precommits_check.yml
│       └── run_tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── docs
│   ├── .gitignore
│   ├── docs
│   │   ├── api
│   │   │   ├── adapters
│   │   │   │   ├── Adapter.md
│   │   │   │   ├── ChatAdapter.md
│   │   │   │   ├── JSONAdapter.md
│   │   │   │   └── TwoStepAdapter.md
│   │   │   ├── evaluation
│   │   │   │   ├── answer_exact_match.md
│   │   │   │   ├── answer_passage_match.md
│   │   │   │   ├── CompleteAndGrounded.md
│   │   │   │   ├── Evaluate.md
│   │   │   │   ├── EvaluationResult.md
│   │   │   │   └── SemanticF1.md
│   │   │   ├── experimental
│   │   │   │   ├── Citations.md
│   │   │   │   └── Document.md
│   │   │   ├── index.md
│   │   │   ├── models
│   │   │   │   ├── Embedder.md
│   │   │   │   └── LM.md
│   │   │   ├── modules
│   │   │   │   ├── BestOfN.md
│   │   │   │   ├── ChainOfThought.md
│   │   │   │   ├── CodeAct.md
│   │   │   │   ├── Module.md
│   │   │   │   ├── MultiChainComparison.md
│   │   │   │   ├── Parallel.md
│   │   │   │   ├── Predict.md
│   │   │   │   ├── ProgramOfThought.md
│   │   │   │   ├── ReAct.md
│   │   │   │   └── Refine.md
│   │   │   ├── optimizers
│   │   │   │   ├── BetterTogether.md
│   │   │   │   ├── BootstrapFewShot.md
│   │   │   │   ├── BootstrapFewShotWithRandomSearch.md
│   │   │   │   ├── BootstrapFinetune.md
│   │   │   │   ├── BootstrapRS.md
│   │   │   │   ├── COPRO.md
│   │   │   │   ├── Ensemble.md
│   │   │   │   ├── GEPA
│   │   │   │   │   ├── GEPA_Advanced.md
│   │   │   │   │   └── overview.md
│   │   │   │   ├── InferRules.md
│   │   │   │   ├── KNN.md
│   │   │   │   ├── KNNFewShot.md
│   │   │   │   ├── LabeledFewShot.md
│   │   │   │   ├── MIPROv2.md
│   │   │   │   └── SIMBA.md
│   │   │   ├── primitives
│   │   │   │   ├── Audio.md
│   │   │   │   ├── Code.md
│   │   │   │   ├── Example.md
│   │   │   │   ├── History.md
│   │   │   │   ├── Image.md
│   │   │   │   ├── Prediction.md
│   │   │   │   ├── Tool.md
│   │   │   │   └── ToolCalls.md
│   │   │   ├── signatures
│   │   │   │   ├── InputField.md
│   │   │   │   ├── OutputField.md
│   │   │   │   └── Signature.md
│   │   │   ├── tools
│   │   │   │   ├── ColBERTv2.md
│   │   │   │   ├── Embeddings.md
│   │   │   │   └── PythonInterpreter.md
│   │   │   └── utils
│   │   │       ├── asyncify.md
│   │   │       ├── configure_cache.md
│   │   │       ├── disable_litellm_logging.md
│   │   │       ├── disable_logging.md
│   │   │       ├── enable_litellm_logging.md
│   │   │       ├── enable_logging.md
│   │   │       ├── inspect_history.md
│   │   │       ├── load.md
│   │   │       ├── StatusMessage.md
│   │   │       ├── StatusMessageProvider.md
│   │   │       ├── streamify.md
│   │   │       └── StreamListener.md
│   │   ├── cheatsheet.md
│   │   ├── community
│   │   │   ├── community-resources.md
│   │   │   ├── how-to-contribute.md
│   │   │   └── use-cases.md
│   │   ├── deep-dive
│   │   │   └── data-handling
│   │   │       ├── built-in-datasets.md
│   │   │       ├── examples.md
│   │   │       ├── img
│   │   │       │   └── data-loading.png
│   │   │       └── loading-custom-data.md
│   │   ├── faqs.md
│   │   ├── index.md
│   │   ├── js
│   │   │   └── runllm-widget.js
│   │   ├── learn
│   │   │   ├── evaluation
│   │   │   │   ├── data.md
│   │   │   │   ├── metrics.md
│   │   │   │   └── overview.md
│   │   │   ├── figures
│   │   │   │   ├── native_tool_call.png
│   │   │   │   └── teleprompter-classes.png
│   │   │   ├── index.md
│   │   │   ├── optimization
│   │   │   │   ├── optimizers.md
│   │   │   │   └── overview.md
│   │   │   └── programming
│   │   │       ├── 7-assertions.md
│   │   │       ├── adapters.md
│   │   │       ├── language_models.md
│   │   │       ├── mcp.md
│   │   │       ├── modules.md
│   │   │       ├── overview.md
│   │   │       ├── signatures.md
│   │   │       └── tools.md
│   │   ├── production
│   │   │   └── index.md
│   │   ├── roadmap.md
│   │   ├── static
│   │   │   ├── .nojekyll
│   │   │   └── img
│   │   │       ├── dspy_logo.png
│   │   │       ├── logo.png
│   │   │       ├── mlflow-tracing-rag.png
│   │   │       ├── modular.png
│   │   │       ├── optimize.png
│   │   │       ├── undraw_docusaurus_mountain.svg
│   │   │       ├── undraw_docusaurus_react.svg
│   │   │       ├── undraw_docusaurus_tree.svg
│   │   │       └── universal_compatibility.png
│   │   ├── stylesheets
│   │   │   └── extra.css
│   │   └── tutorials
│   │       ├── agents
│   │       │   ├── index.ipynb
│   │       │   └── mlflow-tracing-agent.png
│   │       ├── ai_text_game
│   │       │   └── index.md
│   │       ├── async
│   │       │   └── index.md
│   │       ├── audio
│   │       │   └── index.ipynb
│   │       ├── build_ai_program
│   │       │   └── index.md
│   │       ├── cache
│   │       │   └── index.md
│   │       ├── classification
│   │       │   └── index.md
│   │       ├── classification_finetuning
│   │       │   ├── index.ipynb
│   │       │   └── mlflow-tracing-classification.png
│   │       ├── conversation_history
│   │       │   └── index.md
│   │       ├── core_development
│   │       │   └── index.md
│   │       ├── custom_module
│   │       │   ├── index.ipynb
│   │       │   └── mlflow-tracing-custom-module.png
│   │       ├── customer_service_agent
│   │       │   ├── index.ipynb
│   │       │   └── mlflow-tracing-customer-service-agent.png
│   │       ├── deployment
│   │       │   ├── dspy_mlflow_ui.png
│   │       │   └── index.md
│   │       ├── email_extraction
│   │       │   ├── index.md
│   │       │   └── mlflow-tracing-email-extraction.png
│   │       ├── entity_extraction
│   │       │   ├── index.ipynb
│   │       │   └── mlflow-tracing-entity-extraction.png
│   │       ├── games
│   │       │   ├── index.ipynb
│   │       │   └── mlflow-tracing-agent.png
│   │       ├── gepa_ai_program
│   │       │   └── index.md
│   │       ├── gepa_aime
│   │       │   ├── index.ipynb
│   │       │   ├── mlflow-tracing-gepa-aime.png
│   │       │   └── mlflow-tracking-gepa-aime-optimization.png
│   │       ├── gepa_facilitysupportanalyzer
│   │       │   ├── index.ipynb
│   │       │   ├── mlflow-tracing-gepa-support.png
│   │       │   └── mlflow-tracking-gepa-support-optimization.png
│   │       ├── gepa_papillon
│   │       │   ├── index.ipynb
│   │       │   ├── mlflow-tracing-gepa-papilon.png
│   │       │   └── mlflow-tracking-gepa-papilon-optimization.png
│   │       ├── image_generation_prompting
│   │       │   └── index.ipynb
│   │       ├── index.md
│   │       ├── llms_txt_generation
│   │       │   └── index.md
│   │       ├── math
│   │       │   ├── index.ipynb
│   │       │   └── mlflow-tracing-math.png
│   │       ├── mcp
│   │       │   └── index.md
│   │       ├── mem0_react_agent
│   │       │   └── index.md
│   │       ├── multihop_search
│   │       │   ├── index.ipynb
│   │       │   └── mlflow-tracing-multi-hop.png
│   │       ├── observability
│   │       │   ├── index.md
│   │       │   ├── mlflow_trace_ui_navigation.gif
│   │       │   ├── mlflow_trace_ui.png
│   │       │   └── mlflow_trace_view.png
│   │       ├── optimize_ai_program
│   │       │   └── index.md
│   │       ├── optimizer_tracking
│   │       │   ├── child_run.png
│   │       │   ├── experiment.png
│   │       │   ├── index.md
│   │       │   └── parent_run.png
│   │       ├── output_refinement
│   │       │   └── best-of-n-and-refine.md
│   │       ├── papillon
│   │       │   └── index.md
│   │       ├── program_of_thought
│   │       │   └── index.ipynb
│   │       ├── rag
│   │       │   ├── index.ipynb
│   │       │   └── mlflow-tracing-rag.png
│   │       ├── real_world_examples
│   │       │   └── index.md
│   │       ├── rl_ai_program
│   │       │   └── index.md
│   │       ├── rl_multihop
│   │       │   └── index.ipynb
│   │       ├── rl_papillon
│   │       │   └── index.ipynb
│   │       ├── sample_code_generation
│   │       │   └── index.md
│   │       ├── saving
│   │       │   └── index.md
│   │       ├── streaming
│   │       │   └── index.md
│   │       ├── tool_use
│   │       │   └── index.ipynb
│   │       └── yahoo_finance_react
│   │           └── index.md
│   ├── mkdocs.yml
│   ├── overrides
│   │   ├── home.html
│   │   ├── main.html
│   │   └── partials
│   │       └── tabs.html
│   ├── Pipfile
│   ├── Pipfile.lock
│   ├── README.md
│   ├── requirements.txt
│   ├── scripts
│   │   ├── generate_api_docs.py
│   │   └── generate_api_summary.py
│   └── vercel.json
├── dspy
│   ├── __init__.py
│   ├── __metadata__.py
│   ├── adapters
│   │   ├── __init__.py
│   │   ├── baml_adapter.py
│   │   ├── base.py
│   │   ├── chat_adapter.py
│   │   ├── json_adapter.py
│   │   ├── two_step_adapter.py
│   │   ├── types
│   │   │   ├── __init__.py
│   │   │   ├── audio.py
│   │   │   ├── base_type.py
│   │   │   ├── citation.py
│   │   │   ├── code.py
│   │   │   ├── document.py
│   │   │   ├── history.py
│   │   │   ├── image.py
│   │   │   └── tool.py
│   │   ├── utils.py
│   │   └── xml_adapter.py
│   ├── clients
│   │   ├── __init__.py
│   │   ├── base_lm.py
│   │   ├── cache.py
│   │   ├── databricks.py
│   │   ├── embedding.py
│   │   ├── lm_local_arbor.py
│   │   ├── lm_local.py
│   │   ├── lm.py
│   │   ├── openai.py
│   │   ├── provider.py
│   │   └── utils_finetune.py
│   ├── datasets
│   │   ├── __init__.py
│   │   ├── alfworld
│   │   │   ├── __init__.py
│   │   │   ├── alfworld.py
│   │   │   └── base_config.yml
│   │   ├── colors.py
│   │   ├── dataloader.py
│   │   ├── dataset.py
│   │   ├── gsm8k.py
│   │   ├── hotpotqa.py
│   │   └── math.py
│   ├── dsp
│   │   ├── __init__.py
│   │   ├── colbertv2.py
│   │   └── utils
│   │       ├── __init__.py
│   │       ├── dpr.py
│   │       ├── settings.py
│   │       └── utils.py
│   ├── evaluate
│   │   ├── __init__.py
│   │   ├── auto_evaluation.py
│   │   ├── evaluate.py
│   │   └── metrics.py
│   ├── experimental
│   │   └── __init__.py
│   ├── predict
│   │   ├── __init__.py
│   │   ├── aggregation.py
│   │   ├── avatar
│   │   │   ├── __init__.py
│   │   │   ├── avatar.py
│   │   │   ├── models.py
│   │   │   └── signatures.py
│   │   ├── best_of_n.py
│   │   ├── chain_of_thought.py
│   │   ├── code_act.py
│   │   ├── knn.py
│   │   ├── multi_chain_comparison.py
│   │   ├── parallel.py
│   │   ├── parameter.py
│   │   ├── predict.py
│   │   ├── program_of_thought.py
│   │   ├── react.py
│   │   ├── refine.py
│   │   └── retry.py
│   ├── primitives
│   │   ├── __init__.py
│   │   ├── base_module.py
│   │   ├── example.py
│   │   ├── module.py
│   │   ├── prediction.py
│   │   ├── python_interpreter.py
│   │   └── runner.js
│   ├── propose
│   │   ├── __init__.py
│   │   ├── dataset_summary_generator.py
│   │   ├── grounded_proposer.py
│   │   ├── propose_base.py
│   │   └── utils.py
│   ├── retrievers
│   │   ├── __init__.py
│   │   ├── databricks_rm.py
│   │   ├── embeddings.py
│   │   ├── retrieve.py
│   │   └── weaviate_rm.py
│   ├── signatures
│   │   ├── __init__.py
│   │   ├── field.py
│   │   ├── signature.py
│   │   └── utils.py
│   ├── streaming
│   │   ├── __init__.py
│   │   ├── messages.py
│   │   ├── streamify.py
│   │   └── streaming_listener.py
│   ├── teleprompt
│   │   ├── __init__.py
│   │   ├── avatar_optimizer.py
│   │   ├── bettertogether.py
│   │   ├── bootstrap_finetune.py
│   │   ├── bootstrap_trace.py
│   │   ├── bootstrap.py
│   │   ├── copro_optimizer.py
│   │   ├── ensemble.py
│   │   ├── gepa
│   │   │   ├── __init__.py
│   │   │   ├── gepa_utils.py
│   │   │   ├── gepa.py
│   │   │   └── instruction_proposal.py
│   │   ├── grpo.py
│   │   ├── infer_rules.py
│   │   ├── knn_fewshot.py
│   │   ├── mipro_optimizer_v2.py
│   │   ├── random_search.py
│   │   ├── signature_opt.py
│   │   ├── simba_utils.py
│   │   ├── simba.py
│   │   ├── teleprompt_optuna.py
│   │   ├── teleprompt.py
│   │   ├── utils.py
│   │   └── vanilla.py
│   └── utils
│       ├── __init__.py
│       ├── annotation.py
│       ├── asyncify.py
│       ├── caching.py
│       ├── callback.py
│       ├── dummies.py
│       ├── exceptions.py
│       ├── hasher.py
│       ├── inspect_history.py
│       ├── langchain_tool.py
│       ├── logging_utils.py
│       ├── mcp.py
│       ├── parallelizer.py
│       ├── saving.py
│       ├── syncify.py
│       ├── unbatchify.py
│       └── usage_tracker.py
├── LICENSE
├── pyproject.toml
├── README.md
├── tests
│   ├── __init__.py
│   ├── adapters
│   │   ├── test_adapter_utils.py
│   │   ├── test_baml_adapter.py
│   │   ├── test_base_type.py
│   │   ├── test_chat_adapter.py
│   │   ├── test_citation.py
│   │   ├── test_code.py
│   │   ├── test_document.py
│   │   ├── test_json_adapter.py
│   │   ├── test_tool.py
│   │   ├── test_two_step_adapter.py
│   │   └── test_xml_adapter.py
│   ├── callback
│   │   └── test_callback.py
│   ├── clients
│   │   ├── test_cache.py
│   │   ├── test_databricks.py
│   │   ├── test_embedding.py
│   │   ├── test_inspect_global_history.py
│   │   └── test_lm.py
│   ├── conftest.py
│   ├── datasets
│   │   └── test_dataset.py
│   ├── docs
│   │   └── test_mkdocs_links.py
│   ├── evaluate
│   │   ├── test_evaluate.py
│   │   └── test_metrics.py
│   ├── examples
│   │   └── test_baleen.py
│   ├── metadata
│   │   └── test_metadata.py
│   ├── predict
│   │   ├── test_aggregation.py
│   │   ├── test_best_of_n.py
│   │   ├── test_chain_of_thought.py
│   │   ├── test_code_act.py
│   │   ├── test_knn.py
│   │   ├── test_multi_chain_comparison.py
│   │   ├── test_parallel.py
│   │   ├── test_predict.py
│   │   ├── test_program_of_thought.py
│   │   ├── test_react.py
│   │   ├── test_refine.py
│   │   └── test_retry.py
│   ├── primitives
│   │   ├── resources
│   │   │   └── saved_program.json
│   │   ├── test_base_module.py
│   │   ├── test_example.py
│   │   ├── test_module.py
│   │   └── test_python_interpreter.py
│   ├── propose
│   │   └── test_grounded_proposer.py
│   ├── README.md
│   ├── reliability
│   │   ├── __init__.py
│   │   ├── complex_types
│   │   │   └── generated
│   │   │       ├── test_many_types_1
│   │   │       │   ├── inputs
│   │   │       │   │   ├── input1.json
│   │   │       │   │   └── input2.json
│   │   │       │   ├── program.py
│   │   │       │   └── schema.json
│   │   │       ├── test_nesting_1
│   │   │       │   ├── inputs
│   │   │       │   │   ├── input1.json
│   │   │       │   │   └── input2.json
│   │   │       │   ├── program.py
│   │   │       │   └── schema.json
│   │   │       └── test_nesting_2
│   │   │           ├── inputs
│   │   │           │   └── input1.json
│   │   │           ├── program.py
│   │   │           └── schema.json
│   │   ├── conftest.py
│   │   ├── generate
│   │   │   ├── __init__.py
│   │   │   ├── __main__.py
│   │   │   └── utils.py
│   │   ├── input_formats
│   │   │   └── generated
│   │   │       └── test_markdown_1
│   │   │           ├── inputs
│   │   │           │   ├── input1.json
│   │   │           │   └── input2.json
│   │   │           ├── program.py
│   │   │           └── schema.json
│   │   ├── README.md
│   │   ├── reliability_conf.yaml
│   │   ├── test_generated.py
│   │   ├── test_pydantic_models.py
│   │   └── utils.py
│   ├── retrievers
│   │   └── test_embeddings.py
│   ├── signatures
│   │   ├── test_adapter_image.py
│   │   ├── test_custom_types.py
│   │   └── test_signature.py
│   ├── streaming
│   │   └── test_streaming.py
│   ├── teleprompt
│   │   ├── gepa_dummy_lm_custom_component_selector_custom_instruction_proposer.json
│   │   ├── gepa_dummy_lm.json
│   │   ├── test_bootstrap_finetune.py
│   │   ├── test_bootstrap_trace.py
│   │   ├── test_bootstrap.py
│   │   ├── test_copro_optimizer.py
│   │   ├── test_ensemble.py
│   │   ├── test_finetune.py
│   │   ├── test_gepa_instruction_proposer.py
│   │   ├── test_gepa.py
│   │   ├── test_grpo.py
│   │   ├── test_knn_fewshot.py
│   │   ├── test_random_search.py
│   │   ├── test_teleprompt.py
│   │   └── test_utils.py
│   ├── test_utils
│   │   ├── __init__.py
│   │   └── server
│   │       ├── __init__.py
│   │       ├── litellm_server_config.yaml
│   │       └── litellm_server.py
│   └── utils
│       ├── __init__.py
│       ├── resources
│       │   └── mcp_server.py
│       ├── test_annotation.py
│       ├── test_asyncify.py
│       ├── test_exceptions.py
│       ├── test_langchain_tool.py
│       ├── test_mcp.py
│       ├── test_parallelizer.py
│       ├── test_saving.py
│       ├── test_settings.py
│       ├── test_syncify.py
│       ├── test_unbatchify.py
│       └── test_usage_tracker.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/tests/utils/test_exceptions.py:
--------------------------------------------------------------------------------

```python
import dspy
from dspy.utils.exceptions import AdapterParseError


def test_adapter_parse_error_basic():
    adapter_name = "ChatAdapter"
    signature = dspy.make_signature("question->answer1, answer2")
    lm_response = "[[ ## answer1 ## ]]\nanswer1"

    error = AdapterParseError(adapter_name=adapter_name, signature=signature, lm_response=lm_response)

    assert error.adapter_name == adapter_name
    assert error.signature == signature
    assert error.lm_response == lm_response

    error_message = str(error)
    assert error_message == (
        "Adapter ChatAdapter failed to parse the LM response. \n\n"
        "LM Response: [[ ## answer1 ## ]]\nanswer1 \n\n"
        "Expected to find output fields in the LM response: [answer1, answer2] \n\n"
    )


def test_adapter_parse_error_with_message():
    adapter_name = "ChatAdapter"
    signature = dspy.make_signature("question->answer1, answer2")
    lm_response = "[[ ## answer1 ## ]]\nanswer1"
    message = "Critical error, please fix!"

    error = AdapterParseError(adapter_name=adapter_name, signature=signature, lm_response=lm_response, message=message)

    assert error.adapter_name == adapter_name
    assert error.signature == signature
    assert error.lm_response == lm_response

    error_message = str(error)
    assert error_message == (
        "Critical error, please fix!\n\n"
        "Adapter ChatAdapter failed to parse the LM response. \n\n"
        "LM Response: [[ ## answer1 ## ]]\nanswer1 \n\n"
        "Expected to find output fields in the LM response: [answer1, answer2] \n\n"
    )


def test_adapter_parse_error_with_parsed_result():
    adapter_name = "ChatAdapter"
    signature = dspy.make_signature("question->answer1, answer2")
    lm_response = "[[ ## answer1 ## ]]\nanswer1"
    parsed_result = {"answer1": "value1"}

    error = AdapterParseError(
        adapter_name=adapter_name, signature=signature, lm_response=lm_response, parsed_result=parsed_result
    )

    error_message = str(error)
    assert error_message == (
        "Adapter ChatAdapter failed to parse the LM response. \n\n"
        "LM Response: [[ ## answer1 ## ]]\nanswer1 \n\n"
        "Expected to find output fields in the LM response: [answer1, answer2] \n\n"
        "Actual output fields parsed from the LM response: [answer1] \n\n"
    )

```

--------------------------------------------------------------------------------
/tests/reliability/complex_types/generated/test_nesting_1/program.py:
--------------------------------------------------------------------------------

```python
### Input models ###


from pydantic import BaseModel, Field


class Level5(BaseModel):
    field1: str = Field(..., description="A string field at the deepest level")
    field2: float = Field(..., description="A numerical field at the deepest level")


class Level4(BaseModel):
    level5: Level5


class Level3(BaseModel):
    level4: Level4


class Level2(BaseModel):
    level3: Level3


class Level1(BaseModel):
    level2: Level2


class ProgramInputs(BaseModel):
    level1: Level1


### Output models ###


from typing import List

from pydantic import BaseModel, Field


class ResultLevel5(BaseModel):
    outputField1: bool = Field(..., description="A boolean field indicating success or failure")
    outputField2: list[str] = Field(..., description="An array of strings representing messages")


class ResultLevel4(BaseModel):
    resultLevel5: ResultLevel5


class ResultLevel3(BaseModel):
    resultLevel4: ResultLevel4


class ResultLevel2(BaseModel):
    resultLevel3: ResultLevel3


class ResultLevel1(BaseModel):
    resultLevel2: ResultLevel2


class ProgramOutputs(BaseModel):
    resultLevel1: ResultLevel1


### Program definition ###

import dspy


class BaseSignature(dspy.Signature):
    """
    The AI program is designed to process hierarchical data structures with multiple levels of nesting. The program will take a deeply nested input structure representing a complex dataset, perform specific transformations, validations, and computations, and then produce an equally complex nested output structure. The program is suitable for applications that require detailed data processing, such as multi-level data aggregation, hierarchical data validation, and nested data transformation.
    """


program_signature = BaseSignature
for input_field_name, input_field in ProgramInputs.model_fields.items():
    program_signature = program_signature.append(
        name=input_field_name,
        field=dspy.InputField(description=input_field.description),
        type_=input_field.annotation,
    )
for output_field_name, output_field in ProgramOutputs.model_fields.items():
    program_signature = program_signature.append(
        name=output_field_name,
        field=dspy.OutputField(description=input_field.description),
        type_=output_field.annotation,
    )

program = dspy.Predict(program_signature)

```

--------------------------------------------------------------------------------
/docs/docs/learn/optimization/overview.md:
--------------------------------------------------------------------------------

```markdown
---
sidebar_position: 1
---


# Optimization in DSPy

Once you have a system and a way to evaluate it, you can use DSPy optimizers to tune the prompts or weights in your program. Now it's useful to expand your data collection effort into building a training set and a held-out test set, in addition to the development set you've been using for exploration. For the training set (and its subset, validation set), you can often get substantial value out of 30 examples, but aim for at least 300 examples. Some optimizers accept a `trainset` only. Others ask for a `trainset` and a `valset`. When splitting data for most prompt optimizers, we recommend an unusual split compared to deep neural networks: 20% for training, 80% for validation. This reverse allocation emphasizes stable validation, since prompt-based optimizers often overfit to small training sets. In contrast, the [dspy.GEPA](https://dspy.ai/tutorials/gepa_ai_program/) optimizer follows the more standard ML convention: Maximize the training set size, while keeping the validation set just large enough to reflect the distribution of the downstream tasks (test set).

After your first few optimization runs, you are either very happy with everything or you've made a lot of progress but you don't like something about the final program or the metric. At this point, go back to step 1 (Programming in DSPy) and revisit the major questions. Did you define your task well? Do you need to collect (or find online) more data for your problem? Do you want to update your metric? And do you want to use a more sophisticated optimizer? Do you need to consider advanced features like DSPy Assertions? Or, perhaps most importantly, do you want to add some more complexity or steps in your DSPy program itself? Do you want to use multiple optimizers in a sequence?

Iterative development is key. DSPy gives you the pieces to do that incrementally: iterating on your data, your program structure, your metric, and your optimization steps. Optimizing complex LM programs is an entirely new paradigm that only exists in DSPy at the time of writing (update: there are now numerous DSPy extension frameworks, so this part is no longer true :-), so naturally the norms around what to do are still emerging. If you need help, we recently created a [Discord server](https://discord.gg/XCGy2WDCQB) for the community.


```

--------------------------------------------------------------------------------
/tests/utils/test_parallelizer.py:
--------------------------------------------------------------------------------

```python
import time

import pytest

from dspy.utils.parallelizer import ParallelExecutor


def test_worker_threads_independence():
    def task(item):
        # Each thread maintains its own state by appending to a thread-local list
        return item * 2

    data = [1, 2, 3, 4, 5]
    executor = ParallelExecutor(num_threads=3)
    results = executor.execute(task, data)

    assert results == [2, 4, 6, 8, 10]


def test_parallel_execution_speed():
    def task(item):
        time.sleep(0.1)  # Simulate a time-consuming task
        return item

    data = [1, 2, 3, 4, 5]
    executor = ParallelExecutor(num_threads=5)

    start_time = time.time()
    executor.execute(task, data)
    end_time = time.time()

    assert end_time - start_time < len(data)


def test_max_errors_handling():
    def task(item):
        if item == 3:
            raise ValueError("Intentional error")
        return item

    data = [1, 2, 3, 4, 5]
    executor = ParallelExecutor(num_threads=3, max_errors=1)

    with pytest.raises(Exception, match="Execution cancelled due to errors or interruption."):
        executor.execute(task, data)


def test_max_errors_not_met():
    def task(item):
        if item == 3:
            raise ValueError("Intentional error")
        return item

    data = [1, 2, 3, 4, 5]
    executor = ParallelExecutor(num_threads=3, max_errors=2)

    # Ensure that the execution completes without crashing when max_errors is not met
    results = executor.execute(task, data)

    # Verify that the results exclude the failed task
    assert results == [1, 2, None, 4, 5]


def test_parallel_executor_tracks_failed_indices_and_exceptions():
    def task(item):
        if item == 3:
            raise ValueError("test error for 3")
        if item == 5:
            raise RuntimeError("test error for 5")
        return item

    data = [1, 2, 3, 4, 5]
    executor = ParallelExecutor(num_threads=3, max_errors=3)

    results = executor.execute(task, data)

    assert results == [1, 2, None, 4, None]

    assert sorted(executor.failed_indices) == [2, 4]

    assert len(executor.exceptions_map) == 2
    assert isinstance(executor.exceptions_map[2], ValueError)
    assert str(executor.exceptions_map[2]) == "test error for 3"
    assert isinstance(executor.exceptions_map[4], RuntimeError)
    assert str(executor.exceptions_map[4]) == "test error for 5"

```

--------------------------------------------------------------------------------
/dspy/datasets/gsm8k.py:
--------------------------------------------------------------------------------

```python
import random

import tqdm


class GSM8K:
    def __init__(self):
        self.do_shuffle = False

        from datasets import load_dataset

        dataset = load_dataset("gsm8k", "main")

        hf_official_train = dataset["train"]
        hf_official_test = dataset["test"]
        official_train = []
        official_test = []

        for example in tqdm.tqdm(hf_official_train):
            question = example["question"]

            answer = example["answer"].strip().split()
            assert answer[-2] == "####"

            gold_reasoning = " ".join(answer[:-2])
            answer = str(int(answer[-1].replace(",", "")))

            official_train.append({"question": question, "gold_reasoning": gold_reasoning, "answer": answer})

        for example in tqdm.tqdm(hf_official_test):
            question = example["question"]

            answer = example["answer"].strip().split()
            assert answer[-2] == "####"

            gold_reasoning = " ".join(answer[:-2])
            answer = str(int(answer[-1].replace(",", "")))

            official_test.append({"question": question, "gold_reasoning": gold_reasoning, "answer": answer})

        rng = random.Random(0)
        rng.shuffle(official_train)

        rng = random.Random(0)
        rng.shuffle(official_test)

        trainset = official_train[:200]
        devset = official_train[200:500]
        testset = official_test[:]

        import dspy

        trainset = [dspy.Example(**x).with_inputs("question") for x in trainset]
        devset = [dspy.Example(**x).with_inputs("question") for x in devset]
        testset = [dspy.Example(**x).with_inputs("question") for x in testset]

        self.train = trainset
        self.dev = devset
        self.test = testset


def parse_integer_answer(answer, only_first_line=True):
    try:
        if only_first_line:
            answer = answer.strip().split("\n")[0]

        # find the last token that has a number in it
        answer = [token for token in answer.split() if any(c.isdigit() for c in token)][-1]
        answer = answer.split(".")[0]
        answer = "".join([c for c in answer if c.isdigit()])
        answer = int(answer)

    except (ValueError, IndexError):
        answer = 0

    return answer


def gsm8k_metric(gold, pred, trace=None):
    return int(parse_integer_answer(str(gold.answer))) == int(parse_integer_answer(str(pred.answer)))

```

--------------------------------------------------------------------------------
/docs/docs/tutorials/core_development/index.md:
--------------------------------------------------------------------------------

```markdown
# Tools, Development, and Deployment

This section covers essential DSPy features and best practices for professional AI development. Learn how to implement key functionalities like streaming, caching, deployment, and monitoring in your DSPy applications. These tutorials focus on the practical aspects of building production-ready systems.

## Integration and Tooling

### [Use MCP in DSPy](../mcp/index.md)
Learn to integrate Model Context Protocol (MCP) with DSPy applications. This tutorial shows how to leverage MCP for enhanced context management and more sophisticated AI interactions.

### [Output Refinement](../output_refinement/best-of-n-and-refine.md)
Master techniques for improving output quality through refinement strategies. Learn how to implement best-of-N sampling and iterative refinement to get higher-quality results from your DSPy programs.

## Data Management and Persistence

### [Saving and Loading](../saving/index.md)
Understand how to persist and restore DSPy programs and their optimized states. Learn best practices for model versioning, checkpoint management, and program serialization.

### [Cache](../cache/index.md)
Implement efficient caching strategies to improve performance and reduce API costs. Learn how to configure and use DSPy's caching mechanisms effectively in different scenarios.

## Production Deployment

### [Deployment](../deployment/index.md)
Learn to deploy DSPy applications in production environments. This tutorial covers multiple deployment strategies such as FastAPI and MLflow.

### [Streaming](../streaming/index.md)
Implement real-time streaming capabilities in your DSPy applications. Learn how to handle streaming responses for better user experience in interactive applications.

### [Async](../async/index.md)
Build asynchronous DSPy applications for improved performance and scalability. Learn async/await patterns and concurrent execution strategies for high-throughput systems.

## Monitoring and Optimization

### [Debugging & Observability](../observability/index.md)
Master debugging and monitoring techniques for DSPy applications. Learn to use comprehensive logging, tracing, and error handling for production systems.

### [Tracking DSPy Optimizers](../optimizer_tracking/index.md)
Learn to track and analyze optimizer performance and behavior. Understand how to monitor optimization processes and enhance the reproducibility of the optimization.

```

--------------------------------------------------------------------------------
/docs/docs/learn/programming/overview.md:
--------------------------------------------------------------------------------

```markdown
---
sidebar_position: 1
---

# Programming in DSPy

DSPy is a bet on _writing code instead of strings_. In other words, building the right control flow is crucial. Start by **defining your task**. What are the inputs to your system and what should your system produce as output? Is it a chatbot over your data or perhaps a code assistant? Or maybe a system for translation, for highlighting snippets from search results, or for generating reports with citations?

Next, **define your initial pipeline**. Can your DSPy program just be a single module or do you need to break it down into a few steps? Do you need retrieval or other tools, like a calculator or a calendar API? Is there a typical workflow for solving your problem in multiple well-scoped steps, or do you want more open-ended tool use with agents for your task? Think about these but start simple, perhaps with just a single `dspy.ChainOfThought` module, then add complexity incrementally based on observations.

As you do this, **craft and try a handful of examples** of the inputs to your program. Consider using a powerful LM at this point, or a couple of different LMs, just to understand what's possible. Record interesting (both easy and hard) examples you try. This will be useful when you are doing evaluation and optimization later.


??? "Beyond encouraging good design patterns, how does DSPy help here?"

    Conventional prompts couple your fundamental system architecture with incidental choices not portable to new LMs, objectives, or pipelines. A conventional prompt asks the LM to take some inputs and produce some outputs of certain types (a _signature_), formats the inputs in certain ways and requests outputs in a form it can parse accurately (an _adapter_), asks the LM to apply certain strategies like "thinking step by step" or using tools (a _module_'s logic), and relies on substantial trial-and-error to discover the right way to ask each LM to do this (a form of manual _optimization_).
    
    DSPy separates these concerns and automates the lower-level ones until you need to consider them. This allow you to write much shorter code, with much higher portability. For example, if you write a program using DSPy modules, you can swap the LM or its adapter without changing the rest of your logic. Or you can exchange one _module_, like `dspy.ChainOfThought`, with another, like `dspy.ProgramOfThought`, without modifying your signatures. When you're ready to use optimizers, the same program can have its prompts optimized or its LM weights fine-tuned.

```

--------------------------------------------------------------------------------
/dspy/utils/usage_tracker.py:
--------------------------------------------------------------------------------

```python
"""Usage tracking utilities for DSPy."""

from collections import defaultdict
from contextlib import contextmanager
from typing import Any, Generator

from dspy.dsp.utils.settings import settings


class UsageTracker:
    """Tracks LM usage data within a context."""

    def __init__(self):
        # Map of LM name to list of usage entries. For example:
        # {
        #     "openai/gpt-4o-mini": [
        #         {"prompt_tokens": 100, "completion_tokens": 200},
        #         {"prompt_tokens": 300, "completion_tokens": 400},
        #     ],
        # }
        self.usage_data = defaultdict(list)

    def _flatten_usage_entry(self, usage_entry: dict[str, Any]) -> dict[str, Any]:
        result = dict(usage_entry)

        if completion_tokens_details := result.get("completion_tokens_details"):
            result["completion_tokens_details"] = dict(completion_tokens_details)
        if prompt_tokens_details := result.get("prompt_tokens_details"):
            result["prompt_tokens_details"] = dict(prompt_tokens_details)
        return result

    def _merge_usage_entries(self, usage_entry1: dict[str, Any] | None, usage_entry2: dict[str, Any] | None) -> dict[str, Any]:
        if usage_entry1 is None or len(usage_entry1) == 0:
            return dict(usage_entry2)
        if usage_entry2 is None or len(usage_entry2) == 0:
            return dict(usage_entry1)

        result = dict(usage_entry2)
        for k, v in usage_entry1.items():
            current_v = result.get(k)
            if isinstance(v, dict) or isinstance(current_v, dict):
                result[k] = self._merge_usage_entries(current_v, v)
            else:
                result[k] = (current_v or 0) + (v or 0)
        return result

    def add_usage(self, lm: str, usage_entry: dict[str, Any]) -> None:
        """Add a usage entry to the tracker."""
        if len(usage_entry) > 0:
            self.usage_data[lm].append(self._flatten_usage_entry(usage_entry))

    def get_total_tokens(self) -> dict[str, dict[str, Any]]:
        """Calculate total tokens from all tracked usage."""
        total_usage_by_lm = {}
        for lm, usage_entries in self.usage_data.items():
            total_usage = {}
            for usage_entry in usage_entries:
                total_usage = self._merge_usage_entries(total_usage, usage_entry)
            total_usage_by_lm[lm] = total_usage
        return total_usage_by_lm


@contextmanager
def track_usage() -> Generator[UsageTracker, None, None]:
    """Context manager for tracking LM usage."""
    tracker = UsageTracker()

    with settings.context(usage_tracker=tracker):
        yield tracker

```

--------------------------------------------------------------------------------
/dspy/teleprompt/signature_opt.py:
--------------------------------------------------------------------------------

```python
from .copro_optimizer import COPRO

"""
===============================================================
DEPRECATED!!!
PLEASE USE COPRO INSTEAD.
===============================================================

USAGE SUGGESTIONS:

The following code can be used to compile a optimized signature teleprompter, and evaluate it on an end task:

teleprompter = SignatureOptimizer(prompt_model=prompt_model, metric=metric, breadth=BREADTH, depth=DEPTH, init_temperature=INIT_TEMPERATURE)
kwargs = dict(num_threads=NUM_THREADS, display_progress=True, display_table=0)
compiled_prompt_opt = teleprompter.compile(program.deepcopy(), devset=devset[:DEV_NUM], eval_kwargs=kwargs)
eval_score = evaluate(compiled_prompt_opt, devset=evalset[:EVAL_NUM], **kwargs)

Note that this teleprompter takes in the following parameters:

* prompt_model: The model used for prompt generation. When unspecified, defaults to the model set in settings (ie. dspy.settings.configure(lm=task_model)).
* metric: The task metric used for optimization.
* breadth: The number of new prompts to generate at each iteration. Default=10.
* depth: The number of times we should ask our prompt model to generate new prompts, with the history of the past prompts as input. Default=3.
* init_temperature: The temperature used to generate new prompts. Higher roughly equals more creative. Default=1.4.
* verbose: Tells the method whether or not to print intermediate steps.
* track_stats: Tells the method whether or not to track statistics about the optimization process.
                If True, the method will track the following statistics:
                    * results_best: The min,max,avg,stddev of top 10 scores for each predictor at each depth.
                    * results_latest: The min,max,avg,stddev of newest prompt scores for each predictor at each depth.
                    * total_calls: The total number of calls to the task metric.
                These statistics will be returned as attributes of the best program.
"""


class SignatureOptimizer(COPRO):
    def __init__(
        self,
        prompt_model=None,
        metric=None,
        breadth=10,
        depth=3,
        init_temperature=1.4,
        verbose=False,
        track_stats=False,
    ):
        print(
            "\u001b[31m[WARNING] SignatureOptimizer has been deprecated and replaced with COPRO.  SignatureOptimizer will be removed in a future release. \u001b[31m",
        )
        super().__init__(prompt_model, metric, breadth, depth, init_temperature, verbose, track_stats)

    def compile(self, student, *, devset, eval_kwargs):
        return super().compile(student, trainset=devset, eval_kwargs=eval_kwargs)

```

--------------------------------------------------------------------------------
/docs/docs/tutorials/index.md:
--------------------------------------------------------------------------------

```markdown
Welcome to DSPy tutorials! We've organized our tutorials into three main categories to help you get started:

- **Build AI Programs with DSPy**: These hands-on tutorials guide you through building production-ready AI
  applications. From implementing RAG systems to creating intelligent agents, each tutorial demonstrates
  practical use cases. You'll also learn how to leverage DSPy optimizers to enhance your program's performance.

- **Optimize AI Programs with DSPy Optimizers**: These tutorials deep dive into DSPy's optimization capabilities. While
  lighter on programming concepts, they focus on how to systematically improve your AI programs using DSPy
  optimizers, and showcase how DSPy optimizers help improve the quality automatically.

- **DSPy Core Development**: These tutorials cover essential DSPy features and best practices. Learn how to implement
  key functionalities like streaming, caching, deployment, and monitoring in your DSPy applications.


- Build AI Programs with DSPy
    - [Managing Conversation History](conversation_history/index.md)
    - [Building AI Agents with DSPy](customer_service_agent/index.ipynb)
    - [Building AI Applications by Customizing DSPy Modules](custom_module/index.ipynb)
    - [Retrieval-Augmented Generation (RAG)](rag/index.ipynb)
    - [Building RAG as Agent](agents/index.ipynb)
    - [Entity Extraction](entity_extraction/index.ipynb)
    - [Classification](classification/index.md)
    - [Multi-Hop RAG](multihop_search/index.ipynb)
    - [Privacy-Conscious Delegation](papillon/index.md)
    - [Program Of Thought](program_of_thought/index.ipynb)
    - [Image Generation Prompt iteration](image_generation_prompting/index.ipynb)
    - [Audio](audio/index.ipynb)


- Optimize AI Programs with DSPy
    - [Math Reasoning](math/index.ipynb)
    - [Classification Finetuning](classification_finetuning/index.ipynb)
    - [Advanced Tool Use](tool_use/index.ipynb)
    - [Finetuning Agents](games/index.ipynb)


- Reflective Prompt Evolution with dspy.GEPA:
    - [Overview](gepa_ai_program/index.md)
    - [GEPA for AIME](gepa_aime/index.ipynb)
    - [GEPA for PAPILLON](gepa_papillon/index.ipynb)
    - [GEPA for Enterprise classification task](gepa_facilitysupportanalyzer/index.ipynb)


- Tools, Development, and Deployment
    - [Use MCP in DSPy](mcp/index.md)
    - [Output Refinement](output_refinement/best-of-n-and-refine.md)
    - [Saving and Loading](saving/index.md)
    - [Cache](cache/index.md)
    - [Deployment](deployment/index.md)
    - [Debugging & Observability](observability/index.md)
    - [Tracking DSPy Optimizers](optimizer_tracking/index.md)
    - [Streaming](streaming/index.md)
    - [Async](async/index.md)



```

--------------------------------------------------------------------------------
/tests/test_utils/server/__init__.py:
--------------------------------------------------------------------------------

```python
import json
import os
import socket
import subprocess
import tempfile
import time
from typing import Any

import pytest

from tests.test_utils.server.litellm_server import LITELLM_TEST_SERVER_LOG_FILE_PATH_ENV_VAR


@pytest.fixture()
def litellm_test_server() -> tuple[str, str]:
    """
    Start a LiteLLM test server for a DSPy integration test case, and tear down the
    server when the test case completes.
    """
    with tempfile.TemporaryDirectory() as server_log_dir_path:
        # Create a server log file used to store request logs
        server_log_file_path = os.path.join(server_log_dir_path, "request_logs.jsonl")
        open(server_log_file_path, "a").close()

        port = _get_random_port()
        host = "127.0.0.1"
        print(f"Starting LiteLLM proxy server on port {port}")

        process = subprocess.Popen(
            ["litellm", "--host", host, "--port", str(port), "--config", _get_litellm_config_path()],
            env={LITELLM_TEST_SERVER_LOG_FILE_PATH_ENV_VAR: server_log_file_path, **os.environ.copy()},
            text=True,
        )

        try:
            _wait_for_port(host=host, port=port)
        except TimeoutError as e:
            process.terminate()
            raise e

        server_url = f"http://{host}:{port}"
        yield server_url, server_log_file_path

        process.kill()
        process.wait()


def read_litellm_test_server_request_logs(server_log_file_path: str) -> list[dict[str, Any]]:
    """
    Read request logs from a LiteLLM server used during DSPy integration tests.

    Args:
        server_log_file_path: The filesystem path to the LiteLLM server request logs jsonlines file.
    Return:
        A list of log entries, where each entry corresponds to one request handled by the server.
    """
    data = []
    with open(server_log_file_path) as f:
        for line in f:
            data.append(json.loads(line))

    return data


def _get_litellm_config_path():
    module_dir = os.path.dirname(os.path.abspath(__file__))
    return os.path.join(module_dir, "litellm_server_config.yaml")


def _get_random_port():
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind(("", 0))
        return s.getsockname()[1]


def _wait_for_port(host, port, timeout=10):
    start_time = time.time()
    while time.time() - start_time < timeout:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
            try:
                sock.connect((host, port))
                return True
            except ConnectionRefusedError:
                time.sleep(0.5)  # Wait briefly before trying again
    raise TimeoutError(f"Server on port {port} did not become ready within {timeout} seconds.")

```

--------------------------------------------------------------------------------
/tests/reliability/reliability_conf.yaml:
--------------------------------------------------------------------------------

```yaml
adapter: chat
model_list:
  # The model to use for judging the correctness of program
  # outputs throughout reliability test suites. We recommend using
  # a high quality model as the judge, such as OpenAI GPT-4o
  - model_name: "judge"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "gpt-4o"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "gpt-4o-mini"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "gpt-4-turbo"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "gpt-o1"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "gpt-o1-mini"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "claude-3.5-sonnet"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "claude-3.5-haiku"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "gemini-1.5-pro"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "gemini-1.5-flash"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "llama-3.1-405b-instruct"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "llama-3.1-70b-instruct"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "llama-3.1-8b-instruct"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "llama-3.2-3b-instruct"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # api_base: "<api_base>"
  - model_name: "deepseek-r1"
    litellm_params:
      # model: "<litellm_provider>/<litellm_model_name>"
      # api_key: "api key"
      # max_tokens: 10000


```

--------------------------------------------------------------------------------
/tests/reliability/conftest.py:
--------------------------------------------------------------------------------

```python
import os

import pytest

import dspy
from ..conftest import clear_settings
from ..reliability.utils import get_adapter, parse_reliability_conf_yaml

# Standard list of models that should be used for periodic DSPy reliability testing
MODEL_LIST = [
    "gpt-4o",
    "gpt-4o-mini",
    "gpt-4-turbo",
    "gpt-o1-preview",
    "gpt-o1-mini",
    "claude-3.5-sonnet",
    "claude-3.5-haiku",
    "gemini-1.5-pro",
    "gemini-1.5-flash",
    "llama-3.1-405b-instruct",
    "llama-3.1-70b-instruct",
    "llama-3.1-8b-instruct",
    "llama-3.2-3b-instruct",
    "deepseek-r1",
]


def pytest_generate_tests(metafunc):
    """
    Hook to parameterize reliability test cases with each model defined in the
    reliability tests YAML configuration
    """
    known_failing_models = getattr(metafunc.function, "_known_failing_models", [])

    if "configure_model" in metafunc.fixturenames:
        params = [(model, model in known_failing_models) for model in MODEL_LIST]
        ids = [f"{model}" for model, _ in params]  # Custom IDs for display
        metafunc.parametrize("configure_model", params, indirect=True, ids=ids)


@pytest.fixture(autouse=True)
def configure_model(request):
    """
    Fixture to configure the DSPy library with a particular configured model and adapter
    before executing a test case.
    """
    module_dir = os.path.dirname(os.path.abspath(__file__))
    conf_path = os.path.join(module_dir, "reliability_conf.yaml")
    reliability_conf = parse_reliability_conf_yaml(conf_path)
    adapter = get_adapter(reliability_conf)

    model_name, should_ignore_failure = request.param
    model_params = reliability_conf.models.get(model_name)
    if model_params:
        lm = dspy.LM(**model_params)
        dspy.configure(lm=lm, adapter=adapter)
    else:
        pytest.skip(
            f"Skipping test because no reliability testing YAML configuration was found"
            f" for model {model_name}, or the YAML configuration is missing LiteLLM parameters"
            f" for this model ('litellm_params' section of conf file is missing)."
        )

    # Store `should_ignore_failure` flag on the request node for use in post-test handling
    request.node.should_ignore_failure = should_ignore_failure
    request.node.model_name = model_name


@pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(item, call):
    """
    Hook to conditionally ignore failures in a given test case for known failing models.
    """
    outcome = yield
    rep = outcome.get_result()

    should_ignore_failure = getattr(item, "should_ignore_failure", False)

    if should_ignore_failure and rep.failed:
        rep.outcome = "passed"
        rep.wasxfail = "Ignoring failure for known failing model"

```

--------------------------------------------------------------------------------
/dspy/teleprompt/knn_fewshot.py:
--------------------------------------------------------------------------------

```python
import types
from typing import Any

from dspy.clients import Embedder
from dspy.predict.knn import KNN
from dspy.primitives import Example
from dspy.teleprompt import BootstrapFewShot
from dspy.teleprompt.teleprompt import Teleprompter


class KNNFewShot(Teleprompter):
    def __init__(self, k: int, trainset: list[Example], vectorizer: Embedder, **few_shot_bootstrap_args: dict[str, Any]):
        """
        KNNFewShot is an optimizer that uses an in-memory KNN retriever to find the k nearest neighbors
        in a trainset at test time. For each input example in a forward call, it identifies the k most
        similar examples from the trainset and attaches them as demonstrations to the student module.

        Args:
            k: The number of nearest neighbors to attach to the student model.
            trainset: The training set to use for few-shot prompting.
            vectorizer: The `Embedder` to use for vectorization
            **few_shot_bootstrap_args: Additional arguments for the `BootstrapFewShot` optimizer.

        Example:
            ```python
            import dspy
            from sentence_transformers import SentenceTransformer

            # Define a QA module with chain of thought
            qa = dspy.ChainOfThought("question -> answer")

            # Create a training dataset with examples
            trainset = [
                dspy.Example(question="What is the capital of France?", answer="Paris").with_inputs("question"),
                # ... more examples ...
            ]

            # Initialize KNNFewShot with a sentence transformer model
            knn_few_shot = KNNFewShot(
                k=3,
                trainset=trainset,
                vectorizer=dspy.Embedder(SentenceTransformer("all-MiniLM-L6-v2").encode)
            )

            # Compile the QA module with few-shot learning
            compiled_qa = knn_few_shot.compile(qa)

            # Use the compiled module
            result = compiled_qa("What is the capital of Belgium?")
            ```
        """
        self.KNN = KNN(k, trainset, vectorizer=vectorizer)
        self.few_shot_bootstrap_args = few_shot_bootstrap_args

    def compile(self, student, *, teacher=None):
        student_copy = student.reset_copy()

        def forward_pass(_, **kwargs):
            knn_trainset = self.KNN(**kwargs)
            few_shot_bootstrap = BootstrapFewShot(**self.few_shot_bootstrap_args)
            compiled_program = few_shot_bootstrap.compile(
                student,
                teacher=teacher,
                trainset=knn_trainset,
            )
            return compiled_program(**kwargs)

        student_copy.forward = types.MethodType(forward_pass, student_copy)
        return student_copy

```

--------------------------------------------------------------------------------
/tests/teleprompt/test_knn_fewshot.py:
--------------------------------------------------------------------------------

```python
import pytest

import dspy
from dspy.teleprompt.knn_fewshot import KNNFewShot
from dspy.utils.dummies import DummyLM, DummyVectorizer


def mock_example(question: str, answer: str) -> dspy.Example:
    """Creates a mock DSP example with specified question and answer."""
    return dspy.Example(question=question, answer=answer).with_inputs("question")


@pytest.fixture
def setup_knn_few_shot() -> KNNFewShot:
    """Sets up a KNNFewShot instance for testing."""
    trainset = [
        mock_example("What is the capital of France?", "Paris"),
        mock_example("What is the largest ocean?", "Pacific"),
        mock_example("What is 2+2?", "4"),
    ]
    return KNNFewShot(k=2, trainset=trainset, vectorizer=dspy.Embedder(DummyVectorizer()))


def test_knn_few_shot_initialization(setup_knn_few_shot):
    """Tests the KNNFewShot initialization."""
    knn_few_shot = setup_knn_few_shot
    assert knn_few_shot.KNN.k == 2, "Incorrect k value for KNN"
    assert len(knn_few_shot.KNN.trainset) == 3, "Incorrect trainset size for KNN"


class SimpleModule(dspy.Module):
    def __init__(self, signature):
        super().__init__()
        self.predictor = dspy.Predict(signature)

    def forward(self, *args, **kwargs):
        return self.predictor(**kwargs)

    def reset_copy(self):
        # Creates a new instance of SimpleModule with the same predictor
        return SimpleModule(self.predictor.signature)


# TODO: Test not working yet
def _test_knn_few_shot_compile(setup_knn_few_shot):
    """Tests the compile method of KNNFewShot with SimpleModule as student."""
    student = SimpleModule("input -> output")
    teacher = SimpleModule("input -> output")  # Assuming teacher uses the same module type

    # Setup DummyLM with a response for a query similar to one of the training examples
    lm = DummyLM(["Madrid", "10"])
    dspy.settings.configure(lm=lm)  # Responses for the capital of Spain and the result of 5+5)

    knn_few_shot = setup_knn_few_shot
    trainset = knn_few_shot.KNN.trainset
    compiled_student = knn_few_shot.compile(student, teacher=teacher, trainset=trainset, valset=None)

    assert len(compiled_student.predictor.demos) == 1
    assert compiled_student.predictor.demos[0].input == trainset[0].input
    assert compiled_student.predictor.demos[0].output == trainset[0].output
    # Simulate a query that is similar to one of the training examples
    output = compiled_student.forward(input="What is the capital of Spain?").output

    # Validate that the output corresponds to one of the expected DummyLM responses
    # This assumes the compiled_student's forward method will execute the predictor with the given query
    assert output in ["Madrid", "10"], "The compiled student did not return the correct output based on the query"

```

--------------------------------------------------------------------------------
/dspy/utils/annotation.py:
--------------------------------------------------------------------------------

```python
import inspect
import re
import types
from typing import Callable, ParamSpec, TypeVar, overload

P = ParamSpec("P")
R = TypeVar("R")

@overload
def experimental(f: Callable[P, R], version: str | None = None) -> Callable[P, R]: ...

@overload
def experimental(f: None = None, version: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]]: ...


def experimental(
    f: Callable[P, R] | None = None,
    version: str | None = None,
) -> Callable[[Callable[P, R]], Callable[P, R]]:
    """Decorator / decorator creator for marking APIs experimental in the docstring.

    Args:
        f: The function to be decorated.
        version: The version in which the API was introduced as experimental.
            The version is used to determine whether the API should be considered
            as stable or not when releasing a new version of DSPy.

    Returns:
        A decorator that adds a note to the docstring of the decorated API.
    """
    if f:
        return _experimental(f, version)
    else:
        def decorator(f: Callable[P, R]) -> Callable[P, R]:
            return _experimental(f, version)
        return decorator


def _experimental(api: Callable[P, R], version: str | None = None) -> Callable[P, R]:
    """Add experimental notice to the API's docstring."""
    if inspect.isclass(api):
        api_type = "class"
    elif inspect.isfunction(api):
        api_type = "function"
    elif isinstance(api, property):
        api_type = "property"
    elif isinstance(api, types.MethodType):
        api_type = "method"
    else:
        api_type = str(type(api))

    indent = _get_min_indent_of_docstring(api.__doc__) if api.__doc__ else ""

    version_text = f" (introduced in v{version})" if version else ""
    notice = (
        indent + f"Experimental: This {api_type} may change or "
        f"be removed in a future release without warning{version_text}."
    )

    if api_type == "property":
        api.__doc__ = api.__doc__ + "\n\n" + notice if api.__doc__ else notice
    else:
        if api.__doc__:
            api.__doc__ = notice + "\n\n" + api.__doc__
        else:
            api.__doc__ = notice
    return api


def _get_min_indent_of_docstring(docstring_str: str) -> str:
    """
    Get the minimum indentation string of a docstring, based on the assumption
    that the closing triple quote for multiline comments must be on a new line.
    Note that based on ruff rule D209, the closing triple quote for multiline
    comments must be on a new line.

    Args:
        docstring_str: string with docstring

    Returns:
        Whitespace corresponding to the indent of a docstring.
    """

    if not docstring_str or "\n" not in docstring_str:
        return ""

    match = re.match(r"^\s*", docstring_str.rsplit("\n", 1)[-1])
    return match.group() if match else ""

```

--------------------------------------------------------------------------------
/tests/teleprompt/test_bootstrap_finetune.py:
--------------------------------------------------------------------------------

```python
from unittest.mock import patch

import dspy
from dspy import Example
from dspy.predict import Predict
from dspy.teleprompt import BootstrapFinetune
from dspy.utils.dummies import DummyLM


# Define a simple metric function for testing
def simple_metric(example, prediction, trace=None):
    return example.output == prediction.output


examples = [
    Example(input="What is the color of the sky?", output="blue").with_inputs("input"),
    Example(input="What does the fox say?", output="Ring-ding-ding-ding-dingeringeding!").with_inputs("input"),
]
trainset = [examples[0]]


def test_bootstrap_finetune_initialization():
    """Test BootstrapFinetune initialization with various parameters."""
    bootstrap = BootstrapFinetune(metric=simple_metric)
    assert bootstrap.metric == simple_metric, "Metric not correctly initialized"
    assert bootstrap.multitask == True, "Multitask should default to True"


class SimpleModule(dspy.Module):
    def __init__(self, signature):
        super().__init__()
        self.predictor = Predict(signature)

    def forward(self, **kwargs):
        return self.predictor(**kwargs)


def test_compile_with_predict_instances():
    """Test BootstrapFinetune compilation with Predict instances."""
    # Create SimpleModule instances for student and teacher
    student = SimpleModule("input -> output")
    teacher = SimpleModule("input -> output")

    lm = DummyLM([{"output": "blue"}, {"output": "Ring-ding-ding-ding-dingeringeding!"}])
    dspy.settings.configure(lm=lm)

    # Set LM for both student and teacher
    student.set_lm(lm)
    teacher.set_lm(lm)

    bootstrap = BootstrapFinetune(metric=simple_metric)

    # Mock the fine-tuning process since DummyLM doesn't support it
    with patch.object(bootstrap, "finetune_lms") as mock_finetune:
        mock_finetune.return_value = {(lm, None): lm}
        compiled_student = bootstrap.compile(student, teacher=teacher, trainset=trainset)

        assert compiled_student is not None, "Failed to compile student"
        assert hasattr(compiled_student, "_compiled") and compiled_student._compiled, "Student compilation flag not set"

        mock_finetune.assert_called_once()


def test_error_handling_missing_lm():
    """Test error handling when predictor doesn't have an LM assigned."""

    lm = DummyLM([{"output": "test"}])
    dspy.settings.configure(lm=lm)

    student = SimpleModule("input -> output")
    # Intentionally NOT setting LM for the student module

    bootstrap = BootstrapFinetune(metric=simple_metric)

    # This should raise ValueError about missing LM and hint to use set_lm
    try:
        bootstrap.compile(student, trainset=trainset)
        assert False, "Should have raised ValueError for missing LM"
    except ValueError as e:
        assert "does not have an LM assigned" in str(e)
        assert "set_lm" in str(e)

```

--------------------------------------------------------------------------------
/dspy/utils/inspect_history.py:
--------------------------------------------------------------------------------

```python
def _green(text: str, end: str = "\n"):
    return "\x1b[32m" + str(text).lstrip() + "\x1b[0m" + end


def _red(text: str, end: str = "\n"):
    return "\x1b[31m" + str(text) + "\x1b[0m" + end


def _blue(text: str, end: str = "\n"):
    return "\x1b[34m" + str(text) + "\x1b[0m" + end


def pretty_print_history(history, n: int = 1):
    """Prints the last n prompts and their completions."""

    for item in history[-n:]:
        messages = item["messages"] or [{"role": "user", "content": item["prompt"]}]
        outputs = item["outputs"]
        timestamp = item.get("timestamp", "Unknown time")

        print("\n\n\n")
        print("\x1b[34m" + f"[{timestamp}]" + "\x1b[0m" + "\n")

        for msg in messages:
            print(_red(f"{msg['role'].capitalize()} message:"))
            if isinstance(msg["content"], str):
                print(msg["content"].strip())
            else:
                if isinstance(msg["content"], list):
                    for c in msg["content"]:
                        if c["type"] == "text":
                            print(c["text"].strip())
                        elif c["type"] == "image_url":
                            image_str = ""
                            if "base64" in c["image_url"].get("url", ""):
                                len_base64 = len(c["image_url"]["url"].split("base64,")[1])
                                image_str = (
                                    f"<{c['image_url']['url'].split('base64,')[0]}base64,"
                                    f"<IMAGE BASE 64 ENCODED({len_base64!s})>"
                                )
                            else:
                                image_str = f"<image_url: {c['image_url']['url']}>"
                            print(_blue(image_str.strip()))
                        elif c["type"] == "input_audio":
                            audio_format = c["input_audio"]["format"]
                            len_audio = len(c["input_audio"]["data"])
                            audio_str = f"<audio format='{audio_format}' base64-encoded, length={len_audio}>"
                            print(_blue(audio_str.strip()))
            print("\n")

        if isinstance(outputs[0], dict):
            if outputs[0]["text"]:
                print(_red("Response:"))
                print(_green(outputs[0]["text"].strip()))

            if outputs[0].get("tool_calls"):
                print(_red("Tool calls:"))
                for tool_call in outputs[0]["tool_calls"]:
                    print(_green(f"{tool_call['function']['name']}: {tool_call['function']['arguments']}"))
        else:
            print(_red("Response:"))
            print(_green(outputs[0].strip()))

        if len(outputs) > 1:
            choices_text = f" \t (and {len(outputs) - 1} other completions)"
            print(_red(choices_text, end=""))

    print("\n\n\n")

```

--------------------------------------------------------------------------------
/tests/predict/test_refine.py:
--------------------------------------------------------------------------------

```python
import pytest

import dspy
from dspy.predict.predict import Predict
from dspy.predict.refine import Refine
from dspy.primitives.prediction import Prediction
from dspy.utils.dummies import DummyLM


class DummyModule(dspy.Module):
    def __init__(self, signature, forward_fn):
        super().__init__()
        self.predictor = Predict(signature)
        self.forward_fn = forward_fn

    def forward(self, **kwargs) -> Prediction:
        return self.forward_fn(self, **kwargs)


def test_refine_forward_success_first_attempt():
    lm = DummyLM([{"answer": "Brussels"}, {"answer": "City of Brussels"}, {"answer": "Brussels"}])
    dspy.settings.configure(lm=lm)
    module_call_count = [0]

    def count_calls(self, **kwargs):
        module_call_count[0] += 1
        return self.predictor(**kwargs)

    reward_call_count = [0]

    def reward_fn(kwargs, pred: Prediction) -> float:
        reward_call_count[0] += 1
        # The answer should always be one word.
        return 1.0 if len(pred.answer) == 1 else 0.0

    predict = DummyModule("question -> answer", count_calls)

    refine = Refine(module=predict, N=3, reward_fn=reward_fn, threshold=1.0)
    result = refine(question="What is the capital of Belgium?")

    assert result.answer == "Brussels", "Result should be `Brussels`"
    assert reward_call_count[0] > 0, "Reward function should have been called"
    assert module_call_count[0] == 3, (
        "Module should have been called exactly 3 times, but was called %d times" % module_call_count[0]
    )


def test_refine_module_default_fail_count():
    lm = DummyLM([{"answer": "Brussels"}, {"answer": "City of Brussels"}, {"answer": "Brussels"}])
    dspy.settings.configure(lm=lm)

    def always_raise(self, **kwargs):
        raise ValueError("Deliberately failing")

    predict = DummyModule("question -> answer", always_raise)

    refine = Refine(module=predict, N=3, reward_fn=lambda _, __: 1.0, threshold=0.0)
    with pytest.raises(ValueError):
        refine(question="What is the capital of Belgium?")


def test_refine_module_custom_fail_count():
    lm = DummyLM([{"answer": "Brussels"}, {"answer": "City of Brussels"}, {"answer": "Brussels"}])
    dspy.settings.configure(lm=lm)
    module_call_count = [0]

    def raise_on_second_call(self, **kwargs):
        if module_call_count[0] < 2:
            module_call_count[0] += 1
            raise ValueError("Deliberately failing")
        return self.predictor(**kwargs)

    predict = DummyModule("question -> answer", raise_on_second_call)

    refine = Refine(module=predict, N=3, reward_fn=lambda _, __: 1.0, threshold=0.0, fail_count=1)
    with pytest.raises(ValueError):
        refine(question="What is the capital of Belgium?")
    assert module_call_count[0] == 2, (
        "Module should have been called exactly 2 times, but was called %d times" % module_call_count[0]
    )

```

--------------------------------------------------------------------------------
/tests/predict/test_best_of_n.py:
--------------------------------------------------------------------------------

```python
import pytest

import dspy
from dspy.predict.best_of_n import BestOfN
from dspy.predict.predict import Predict
from dspy.primitives.prediction import Prediction
from dspy.utils.dummies import DummyLM


class DummyModule(dspy.Module):
    def __init__(self, signature, forward_fn):
        super().__init__()
        self.predictor = Predict(signature)
        self.forward_fn = forward_fn

    def forward(self, **kwargs) -> Prediction:
        return self.forward_fn(self, **kwargs)


def test_refine_forward_success_first_attempt():
    lm = DummyLM([{"answer": "Brussels"}, {"answer": "City of Brussels"}, {"answer": "Brussels"}])
    dspy.settings.configure(lm=lm)
    module_call_count = [0]

    def count_calls(self, **kwargs):
        module_call_count[0] += 1
        return self.predictor(**kwargs)

    reward_call_count = [0]

    def reward_fn(kwargs, pred: Prediction) -> float:
        reward_call_count[0] += 1
        # The answer should always be one word.
        return 1.0 if len(pred.answer) == 1 else 0.0

    predict = DummyModule("question -> answer", count_calls)

    best_of_n = BestOfN(module=predict, N=3, reward_fn=reward_fn, threshold=1.0)
    result = best_of_n(question="What is the capital of Belgium?")

    assert result.answer == "Brussels", "Result should be `Brussels`"
    assert reward_call_count[0] > 0, "Reward function should have been called"
    assert module_call_count[0] == 3, (
        "Module should have been called exactly 3 times, but was called %d times" % module_call_count[0]
    )


def test_refine_module_default_fail_count():
    lm = DummyLM([{"answer": "Brussels"}, {"answer": "City of Brussels"}, {"answer": "Brussels"}])
    dspy.settings.configure(lm=lm)

    def always_raise(self, **kwargs):
        raise ValueError("Deliberately failing")

    predict = DummyModule("question -> answer", always_raise)

    best_of_n = BestOfN(module=predict, N=3, reward_fn=lambda _, __: 1.0, threshold=0.0)
    with pytest.raises(ValueError):
        best_of_n(question="What is the capital of Belgium?")


def test_refine_module_custom_fail_count():
    lm = DummyLM([{"answer": "Brussels"}, {"answer": "City of Brussels"}, {"answer": "Brussels"}])
    dspy.settings.configure(lm=lm)
    module_call_count = [0]

    def raise_on_second_call(self, **kwargs):
        if module_call_count[0] < 2:
            module_call_count[0] += 1
            raise ValueError("Deliberately failing")
        return self.predictor(**kwargs)

    predict = DummyModule("question -> answer", raise_on_second_call)

    best_of_n = BestOfN(module=predict, N=3, reward_fn=lambda _, __: 1.0, threshold=0.0, fail_count=1)
    with pytest.raises(ValueError):
        best_of_n(question="What is the capital of Belgium?")
    assert module_call_count[0] == 2, (
        "Module should have been called exactly 2 times, but was called %d times" % module_call_count[0]
    )

```

--------------------------------------------------------------------------------
/tests/reliability/complex_types/generated/test_many_types_1/program.py:
--------------------------------------------------------------------------------

```python
### Input models ###


from datetime import datetime
from enum import Enum
from typing import List, Tuple

from pydantic import BaseModel, Field


class EnumField(Enum):
    option1 = "option1"
    option2 = "option2"
    option3 = "option3"


class LiteralField(Enum):
    literalValue = "literalValue"


class ObjectField(BaseModel):
    subField1: str
    subField2: float


class NestedObjectField(BaseModel):
    tupleField: Tuple[str, float]
    enumField: EnumField
    datetimeField: datetime
    literalField: LiteralField


class ProgramInputs(BaseModel):
    tupleField: Tuple[str, float]
    enumField: EnumField
    datetimeField: datetime
    literalField: LiteralField
    objectField: ObjectField
    nestedObjectField: NestedObjectField


### Output models ###


from datetime import datetime
from enum import Enum
from typing import List, Tuple, Union

from pydantic import BaseModel, Field


class ProcessedEnumField(Enum):
    option1 = "option1"
    option2 = "option2"
    option3 = "option3"


class ProcessedLiteralField(Enum):
    literalValue = "literalValue"


class ProcessedObjectField(BaseModel):
    subField1: str
    subField2: float
    additionalField: bool


class EnumField(Enum):
    option1 = "option1"
    option2 = "option2"
    option3 = "option3"


class LiteralField(Enum):
    literalValue = "literalValue"


class ProcessedNestedObjectField(BaseModel):
    tupleField: Tuple[str, float]
    enumField: EnumField
    datetimeField: datetime
    literalField: LiteralField
    additionalField: bool


class ProgramOutputs(BaseModel):
    processedTupleField: Tuple[str, float]
    processedEnumField: ProcessedEnumField
    processedDatetimeField: datetime
    processedLiteralField: ProcessedLiteralField
    processedObjectField: ProcessedObjectField
    processedNestedObjectField: ProcessedNestedObjectField


### Program definition ###

import dspy


class BaseSignature(dspy.Signature):
    """
    The program is designed to process various data types including tuples, enums, datetime values, literals, objects, and nested objects containing these types. The program will accept inputs of these types, perform specified operations on them, and return the results. The operations could include validation, transformation, and extraction of information from these inputs.
    """


program_signature = BaseSignature
for input_field_name, input_field in ProgramInputs.model_fields.items():
    program_signature = program_signature.append(
        name=input_field_name,
        field=dspy.InputField(description=input_field.description),
        type_=input_field.annotation,
    )
for output_field_name, output_field in ProgramOutputs.model_fields.items():
    program_signature = program_signature.append(
        name=output_field_name,
        field=dspy.OutputField(description=input_field.description),
        type_=output_field.annotation,
    )

program = dspy.Predict(program_signature)

```

--------------------------------------------------------------------------------
/tests/clients/test_databricks.py:
--------------------------------------------------------------------------------

```python
"""Test the Databricks finetuning and deployment.

This test requires valid Databricks credentials, so it is skipped on github actions. Right now it is only used for
manual testing.
"""

import pytest

import dspy
from dspy.clients.databricks import (
    DatabricksProvider,
    TrainingJobDatabricks,
    _create_directory_in_databricks_unity_catalog,
)

try:
    from databricks.sdk import WorkspaceClient

    WorkspaceClient()
except (ImportError, Exception):
    # Skip the test if the Databricks SDK is not configured or credentials are not available.
    pytestmark = pytest.mark.skip(reason="Databricks SDK not configured or credentials not available")


def test_create_directory_in_databricks_unity_catalog():
    from databricks.sdk import WorkspaceClient

    w = WorkspaceClient()

    with pytest.raises(
        ValueError,
        match=(
            "Databricks Unity Catalog path must be in the format '/Volumes/<catalog>/<schema>/<volume>/...', "
            "but received: /badstring/whatever"
        ),
    ):
        _create_directory_in_databricks_unity_catalog(w, "/badstring/whatever")

    _create_directory_in_databricks_unity_catalog(w, "/Volumes/main/chenmoney/testing/dspy_testing")
    # Check that the directory was created successfully, otherwise `get_directory_metadata` will raise an exception.
    w.files.get_directory_metadata("/Volumes/main/chenmoney/testing/dspy_testing")


def test_create_finetuning_job():
    fake_training_data = [
        {
            "messages": [
                {"role": "user", "content": "Hello, how are you?"},
                {"role": "assistant", "content": "I'm doing great, thank you!"},
            ]
        },
        {
            "messages": [
                {"role": "user", "content": "What is the capital of France?"},
                {"role": "assistant", "content": "Paris!"},
            ]
        },
        {
            "messages": [
                {"role": "user", "content": "What is the capital of Germany?"},
                {"role": "assistant", "content": "Berlin!"},
            ]
        },
    ]
    dspy.settings.experimental = True

    job = TrainingJobDatabricks()

    DatabricksProvider.finetune(
        job=job,
        model="meta-llama/Llama-3.2-1B",
        train_data=fake_training_data,
        data_format="chat",
        train_kwargs={
            "train_data_path": "/Volumes/main/chenmoney/testing/dspy_testing",
            "register_to": "main.chenmoney.finetuned_model",
            "task_type": "CHAT_COMPLETION",
            "skip_deploy": True,
        },
    )
    assert job.finetuning_run.status.display_name is not None


def test_deploy_finetuned_model():
    dspy.settings.experimental = True
    model_to_deploy = "main.chenmoney.finetuned_model"

    DatabricksProvider.deploy_finetuned_model(
        model=model_to_deploy,
        data_format="chat",
    )

    lm = dspy.LM(model="databricks/main_chenmoney_finetuned_model")
    lm("what is 2 + 2?")

```

--------------------------------------------------------------------------------
/dspy/predict/retry.py:
--------------------------------------------------------------------------------

```python
# import copy

# import dspy

# from .predict import Predict


# class Retry(Predict):
#     def __init__(self, module):
#         super().__init__(module.signature)
#         self.module = module
#         self.original_signature = module.signature
#         self.original_forward = module.forward
#         self.new_signature = self._create_new_signature(self.original_signature)

#     def _create_new_signature(self, signature):
#         # Add "Past" input fields for each output field
#         for key, value in signature.output_fields.items():
#             actual_prefix = value.json_schema_extra["prefix"].split(":")[0] + ":"
#             signature = signature.append(f"past_{key}", dspy.InputField(
#                 prefix="Previous " + actual_prefix,
#                 desc=f"past {actual_prefix[:-1]} with errors",
#                 format=value.json_schema_extra.get("format"),
#             ))

#         signature = signature.append("feedback", dspy.InputField(
#             prefix="Instructions:",
#             desc="Some instructions you must satisfy",
#             format=str,
#         ))

#         return signature

#     def forward(self, *, past_outputs, **kwargs):
#         # Take into account the possible new signature, as in TypedPredictor
#         new_signature = kwargs.pop("new_signature", None)
#         if new_signature:
#             self.original_signature = new_signature
#             self.new_signature = self._create_new_signature(self.original_signature)

#         # Convert the dict past_outputs={"answer": ...} to kwargs
#         # {past_answer=..., ...}
#         for key, value in past_outputs.items():
#             past_key = f"past_{key}"
#             if past_key in self.new_signature.input_fields:
#                 kwargs[past_key] = value
#         # Tell the wrapped module to use the new signature.
#         # Note: This only works if the wrapped module is a Predict or ChainOfThought.
#         kwargs["new_signature"] = self.new_signature
#         return self.original_forward(**kwargs)

#     def __call__(self, **kwargs):
#         copy.deepcopy(kwargs)
#         kwargs["_trace"] = False
#         kwargs.setdefault("demos", self.demos if self.demos is not None else [])

#         # perform backtracking
#         if dspy.settings.backtrack_to == self:
#             for key, value in dspy.settings.backtrack_to_args.items():
#                 kwargs.setdefault(key, value)
#             pred = self.forward(**kwargs)
#         else:
#             pred = self.module(**kwargs)

#         # now pop multiple reserved keys
#         # NOTE(shangyin) past_outputs seems not useful to include in demos,
#         # therefore dropped
#         for key in ["_trace", "demos", "signature", "new_signature", "config", "lm", "past_outputs"]:
#             kwargs.pop(key, None)

#         if dspy.settings.trace is not None:
#             trace = dspy.settings.trace
#             trace.append((self, {**kwargs}, pred))
#         return pred

```

--------------------------------------------------------------------------------
/tests/primitives/test_example.py:
--------------------------------------------------------------------------------

```python
import pytest

import dspy
from dspy import Example


def test_example_initialization():
    example = Example(a=1, b=2)
    assert example.a == 1
    assert example.b == 2


def test_example_initialization_from_base():
    base = Example(a=1, b=2)
    example = Example(base=base, c=3)
    assert example.a == 1
    assert example.b == 2
    assert example.c == 3


def test_example_initialization_from_dict():
    base_dict = {"a": 1, "b": 2}
    example = Example(base=base_dict, c=3)
    assert example.a == 1
    assert example.b == 2
    assert example.c == 3


def test_example_set_get_item():
    example = Example()
    example["a"] = 1
    assert example["a"] == 1


def test_example_attribute_access():
    example = Example(a=1)
    assert example.a == 1
    example.a = 2
    assert example.a == 2


def test_example_deletion():
    example = Example(a=1, b=2)
    del example["a"]
    with pytest.raises(AttributeError):
        _ = example.a


def test_example_len():
    example = Example(a=1, b=2, dspy_hidden=3)
    assert len(example) == 2


def test_example_repr_str_img():
    example = Example(
        img=dspy.Image(url="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")
    )
    assert (
        repr(example)
        == "Example({'img': Image(url=data:image/gif;base64,<IMAGE_BASE_64_ENCODED(56)>)}) (input_keys=None)"
    )
    assert (
        str(example)
        == "Example({'img': Image(url=data:image/gif;base64,<IMAGE_BASE_64_ENCODED(56)>)}) (input_keys=None)"
    )


def test_example_repr_str():
    example = Example(a=1)
    assert repr(example) == "Example({'a': 1}) (input_keys=None)"
    assert str(example) == "Example({'a': 1}) (input_keys=None)"


def test_example_eq():
    example1 = Example(a=1, b=2)
    example2 = Example(a=1, b=2)
    assert example1 == example2
    assert example1 != ""


def test_example_hash():
    example1 = Example(a=1, b=2)
    example2 = Example(a=1, b=2)
    assert hash(example1) == hash(example2)


def test_example_keys_values_items():
    example = Example(a=1, b=2, dspy_hidden=3)
    assert set(example.keys()) == {"a", "b"}
    assert 1 in example.values()
    assert ("b", 2) in example.items()


def test_example_get():
    example = Example(a=1, b=2)
    assert example.get("a") == 1
    assert example.get("c", "default") == "default"


def test_example_with_inputs():
    example = Example(a=1, b=2).with_inputs("a")
    assert example._input_keys == {"a"}


def test_example_inputs_labels():
    example = Example(a=1, b=2).with_inputs("a")
    inputs = example.inputs()
    assert inputs.toDict() == {"a": 1}
    labels = example.labels()
    assert labels.toDict() == {"b": 2}


def test_example_copy_without():
    example = Example(a=1, b=2)
    copied = example.copy(c=3)
    assert copied.a == 1
    assert copied.c == 3
    without_a = copied.without("a")
    with pytest.raises(AttributeError):
        _ = without_a.a


def test_example_to_dict():
    example = Example(a=1, b=2)
    assert example.toDict() == {"a": 1, "b": 2}

```

--------------------------------------------------------------------------------
/docs/docs/tutorials/build_ai_program/index.md:
--------------------------------------------------------------------------------

```markdown
# Build AI Programs with DSPy

This section contains hands-on tutorials that guide you through building production-ready AI applications using DSPy. Each tutorial demonstrates practical use cases and shows you how to leverage DSPy's modular programming approach to create robust, maintainable AI systems.

## Core Applications

### [Managing Conversation History](../conversation_history/index.md)
Learn how to manage conversation history in DSPy applications.

### [Building AI Agents with DSPy](../customer_service_agent/index.ipynb)
Learn to create intelligent agents that can handle complex customer service scenarios. This tutorial shows how to build agents that can understand context, maintain conversation state, and provide helpful responses.

### [Building AI Applications by Customizing DSPy Modules](../custom_module/index.ipynb)
Discover how to create custom DSPy modules tailored to your specific needs. Learn the patterns for building reusable, composable components that can be shared across different applications.

## Retrieval-Augmented Generation (RAG)

### [Retrieval-Augmented Generation (RAG)](../rag/index.ipynb)
Master the fundamentals of RAG systems with DSPy. Learn how to combine retrieval mechanisms with language models to build systems that can answer questions using external knowledge sources.

### [Building RAG as Agent](../agents/index.ipynb)
Take RAG to the next level by building `ReAct` agent-based systems that can reason about when and how to retrieve information, making your RAG systems more intelligent and adaptive.

### [Multi-Hop RAG](../multihop_search/index.ipynb)
Build sophisticated RAG systems that can perform multi-step reasoning across multiple information sources, perfect for complex research and analysis tasks.

## Specialized Use Cases

### [Entity Extraction](../entity_extraction/index.ipynb)
Learn to build systems that can identify and extract specific entities from text, essential for information processing and data analysis applications.

### [Classification](../classification/index.md)
Build robust text classification systems using DSPy's modular approach with a topic classification example.

### [Privacy-Conscious Delegation](../papillon/index.md)
Explore advanced techniques for building AI systems that respect privacy constraints while maintaining high performance by combining a small local model and an advanced external model.

## Advanced Reasoning

### [Program Of Thought](../program_of_thought/index.ipynb)
Learn to build systems that can generate and execute code to solve complex problems, combining the power of language models with programmatic reasoning.

## Multimodal Applications

### [Image Generation Prompt iteration](../image_generation_prompting/index.ipynb)
Discover how to use DSPy to iteratively improve image generation prompts, creating better visual content through systematic optimization.

### [Audio](../audio/index.ipynb)
Explore audio processing applications with DSPy, learning to build systems that can understand, process, and generate audio content.

```

--------------------------------------------------------------------------------
/tests/predict/test_retry.py:
--------------------------------------------------------------------------------

```python
# import functools

# import pydantic

# import dspy
# from dspy.primitives.assertions import assert_transform_module, backtrack_handler
# from dspy.utils import DummyLM


# def test_retry_simple():
#     predict = dspy.Predict("question -> answer")
#     retry_module = dspy.Retry(predict)

#     # Test Retry has created the correct new signature
#     for field in predict.signature.output_fields:
#         assert f"past_{field}" in retry_module.new_signature.input_fields
#     assert "feedback" in retry_module.new_signature.input_fields

#     lm = DummyLM([{"answer": "blue"}])
#     dspy.settings.configure(lm=lm)
#     result = retry_module.forward(
#         question="What color is the sky?",
#         past_outputs={"answer": "red"},
#         feedback="Try harder",
#     )
#     assert result.answer == "blue"


# def test_retry_forward_with_feedback():
#     # First we make a mistake, then we fix it
#     lm = DummyLM([{"answer": "red"}, {"answer": "blue"}])
#     dspy.settings.configure(lm=lm, trace=[])

#     class SimpleModule(dspy.Module):
#         def __init__(self):
#             super().__init__()
#             self.predictor = dspy.Predict("question -> answer")

#         def forward(self, **kwargs):
#             result = self.predictor(**kwargs)
#             print(f"SimpleModule got {result.answer=}")
#             dspy.Suggest(result.answer == "blue", "Please think harder")
#             return result

#     program = SimpleModule()
#     program = assert_transform_module(
#         program.map_named_predictors(dspy.Retry),
#         functools.partial(backtrack_handler, max_backtracks=1),
#     )

#     result = program(question="What color is the sky?")

#     assert result.answer == "blue"


# # def test_retry_forward_with_typed_predictor():
# #     # First we make a mistake, then we fix it
# #     lm = DummyLM([{"output": '{"answer":"red"}'}, {"output": '{"answer":"blue"}'}])
# #     dspy.settings.configure(lm=lm, trace=[])

# #     class AnswerQuestion(dspy.Signature):
# #         """Answer questions with succinct responses."""

# #         class Input(pydantic.BaseModel):
# #             question: str

# #         class Output(pydantic.BaseModel):
# #             answer: str

# #         input: Input = dspy.InputField()
# #         output: Output = dspy.OutputField()

# #     class QuestionAnswerer(dspy.Module):
# #         def __init__(self):
# #             super().__init__()
# #             self.answer_question = dspy.TypedPredictor(AnswerQuestion)

# #         def forward(self, **kwargs):
# #             result = self.answer_question(input=AnswerQuestion.Input(**kwargs)).output
# #             dspy.Suggest(result.answer == "blue", "Please think harder")
# #             return result

# #     program = QuestionAnswerer()
# #     program = assert_transform_module(
# #         program.map_named_predictors(dspy.Retry),
# #         functools.partial(backtrack_handler, max_backtracks=1),
# #     )

# #     result = program(question="What color is the sky?")

# #     assert result.answer == "blue"

```

--------------------------------------------------------------------------------
/tests/utils/test_annotation.py:
--------------------------------------------------------------------------------

```python
from dspy.utils.annotation import experimental


def test_experimental_decorator_on_function():
    @experimental
    def test_function():
        """A test function."""
        return "test"

    assert "Experimental: This function may change or be removed in a future release without warning." in test_function.__doc__
    assert "A test function." in test_function.__doc__
    assert test_function() == "test"


def test_experimental_decorator_on_function_with_version():
    @experimental(version="3.1.0")
    def test_function():
        """A test function with version."""
        return "versioned"

    assert "introduced in v3.1.0" in test_function.__doc__
    assert "Experimental: This function may change or be removed in a future release without warning (introduced in v3.1.0)." in test_function.__doc__
    assert "A test function with version." in test_function.__doc__
    assert test_function() == "versioned"


def test_experimental_decorator_on_class():
    @experimental
    class TestClass:
        """A test class."""

        def method(self):
            return "method"

    assert "Experimental: This class may change or be removed in a future release without warning." in TestClass.__doc__
    assert "A test class." in TestClass.__doc__

    instance = TestClass()
    assert instance.method() == "method"


def test_experimental_decorator_on_class_with_version():
    @experimental(version="2.5.0")
    class TestClass:
        """A test class with version."""
        pass

    assert "introduced in v2.5.0" in TestClass.__doc__
    assert "Experimental: This class may change or be removed in a future release without warning (introduced in v2.5.0)." in TestClass.__doc__
    assert "A test class with version." in TestClass.__doc__


def test_experimental_decorator_without_docstring():
    @experimental
    def test_function():
        return "no_doc"

    assert test_function.__doc__ == "Experimental: This function may change or be removed in a future release without warning."
    assert test_function() == "no_doc"


def test_experimental_decorator_without_docstring_with_version():
    @experimental(version="1.0.0")
    def test_function():
        return "no_doc_version"

    assert test_function.__doc__ == "Experimental: This function may change or be removed in a future release without warning (introduced in v1.0.0)."
    assert test_function() == "no_doc_version"


def test_experimental_decorator_with_callable_syntax():
    def test_function():
        """A test function."""
        return "callable"

    decorated = experimental(test_function)

    assert "Experimental:" in decorated.__doc__
    assert "A test function." in decorated.__doc__
    assert decorated() == "callable"


def test_experimental_decorator_with_version_callable_syntax():
    def test_function():
        """A test function."""
        return "callable_version"

    decorated = experimental(test_function, version="4.0.0")

    assert "introduced in v4.0.0" in decorated.__doc__
    assert "Experimental:" in decorated.__doc__
    assert decorated() == "callable_version"

```

--------------------------------------------------------------------------------
/tests/reliability/complex_types/generated/test_nesting_1/schema.json:
--------------------------------------------------------------------------------

```json
{
  "description": "The AI program is designed to process hierarchical data structures with multiple levels of nesting. The program will take a deeply nested input structure representing a complex dataset, perform specific transformations, validations, and computations, and then produce an equally complex nested output structure. The program is suitable for applications that require detailed data processing, such as multi-level data aggregation, hierarchical data validation, and nested data transformation.",
  "properties": {
    "level1": {
      "properties": {
        "level2": {
          "properties": {
            "level3": {
              "properties": {
                "level4": {
                  "properties": {
                    "level5": {
                      "properties": {
                        "field1": {
                          "description": "A string field at the deepest level",
                          "type": "string"
                        },
                        "field2": {
                          "description": "A numerical field at the deepest level",
                          "type": "number"
                        }
                      },
                      "required": ["field1", "field2"],
                      "type": "object"
                    }
                  },
                  "required": ["level5"],
                  "type": "object"
                }
              },
              "required": ["level4"],
              "type": "object"
            }
          },
          "required": ["level3"],
          "type": "object"
        }
      },
      "required": ["level2"],
      "type": "object"
    },
    "resultLevel1": {
      "properties": {
        "resultLevel2": {
          "properties": {
            "resultLevel3": {
              "properties": {
                "resultLevel4": {
                  "properties": {
                    "resultLevel5": {
                      "properties": {
                        "outputField1": {
                          "description": "A boolean field indicating success or failure",
                          "type": "boolean"
                        },
                        "outputField2": {
                          "description": "An array of strings representing messages",
                          "items": {
                            "type": "string"
                          },
                          "type": "array"
                        }
                      },
                      "required": ["outputField1", "outputField2"],
                      "type": "object"
                    }
                  },
                  "required": ["resultLevel5"],
                  "type": "object"
                }
              },
              "required": ["resultLevel4"],
              "type": "object"
            }
          },
          "required": ["resultLevel3"],
          "type": "object"
        }
      },
      "required": ["resultLevel2"],
      "type": "object"
    }
  },
  "required": ["level1", "resultLevel1"],
  "type": "object"
}

```

--------------------------------------------------------------------------------
/dspy/predict/parallel.py:
--------------------------------------------------------------------------------

```python
import threading
from typing import Any

from dspy.dsp.utils.settings import settings
from dspy.primitives.example import Example
from dspy.utils.parallelizer import ParallelExecutor


class Parallel:
    def __init__(
        self,
        num_threads: int | None = None,
        max_errors: int | None = None,
        access_examples: bool = True,
        return_failed_examples: bool = False,
        provide_traceback: bool | None = None,
        disable_progress_bar: bool = False,
    ):
        super().__init__()
        self.num_threads = num_threads or settings.num_threads
        self.max_errors = settings.max_errors if max_errors is None else max_errors
        self.access_examples = access_examples
        self.return_failed_examples = return_failed_examples
        self.provide_traceback = provide_traceback
        self.disable_progress_bar = disable_progress_bar

        self.error_count = 0
        self.error_lock = threading.Lock()
        self.cancel_jobs = threading.Event()
        self.failed_examples = []
        self.exceptions = []

    def forward(self, exec_pairs: list[tuple[Any, Example]], num_threads: int | None = None) -> list[Any]:
        num_threads = num_threads if num_threads is not None else self.num_threads

        executor = ParallelExecutor(
            num_threads=num_threads,
            max_errors=self.max_errors,
            provide_traceback=self.provide_traceback,
            disable_progress_bar=self.disable_progress_bar,
        )

        def process_pair(pair):
            result = None
            module, example = pair

            if isinstance(example, Example):
                if self.access_examples:
                    result = module(**example.inputs())
                else:
                    result = module(example)
            elif isinstance(example, dict):
                result = module(**example)
            elif isinstance(example, list) and module.__class__.__name__ == "Parallel":
                result = module(example)
            elif isinstance(example, tuple):
                result = module(*example)
            else:
                raise ValueError(
                    f"Invalid example type: {type(example)}, only supported types are Example, dict, list and tuple"
                )
            return result

        # Execute the processing function over the execution pairs
        results = executor.execute(process_pair, exec_pairs)

        # Populate failed examples and exceptions from the executor
        if self.return_failed_examples:
            for failed_idx in executor.failed_indices:
                if failed_idx < len(exec_pairs):
                    _, original_example = exec_pairs[failed_idx]
                    self.failed_examples.append(original_example)
                    if exception := executor.exceptions_map.get(failed_idx):
                        self.exceptions.append(exception)

            return results, self.failed_examples, self.exceptions
        else:
            return results

    def __call__(self, *args: Any, **kwargs: Any) -> Any:
        return self.forward(*args, **kwargs)

```

--------------------------------------------------------------------------------
/tests/reliability/complex_types/generated/test_nesting_2/program.py:
--------------------------------------------------------------------------------

```python
### Input models ###


from datetime import datetime

from pydantic import BaseModel, Field


class Details(BaseModel):
    value: str = Field(..., description="Customer's value category")
    age: int = Field(..., description="Customer's age")


class Customer(BaseModel):
    customer_id: str = Field(..., description="Unique identifier for the customer")
    customer_type: bool = Field(..., description="Indicates if the customer is a premium member")
    details: Details


class Details1(BaseModel):
    value: float = Field(..., description="Monetary value of the transaction")
    timestamp: datetime = Field(..., description="Timestamp of the transaction")


class Transaction(BaseModel):
    transaction_id: str = Field(..., description="Unique identifier for the transaction")
    amount: float = Field(..., description="Transaction amount")
    details: Details1


class ProgramInputs(BaseModel):
    customer: Customer
    transaction: Transaction


### Output models ###


from datetime import datetime

from pydantic import BaseModel, Field


class CustomerType(BaseModel):
    is_premium: bool = Field(..., description="Indicates if the customer is a premium member")
    category: str = Field(..., description="Customer's membership category")


class CustomerSummary(BaseModel):
    customer_id: str = Field(..., description="Unique identifier for the customer")
    customer_type: CustomerType
    value: str = Field(..., description="Customer's value category")


class Details(BaseModel):
    value: float = Field(..., description="Monetary value of the transaction")
    timestamp: datetime = Field(..., description="Timestamp of the transaction")


class TransactionSummary(BaseModel):
    transaction_id: str = Field(..., description="Unique identifier for the transaction")
    total_amount: float = Field(..., description="Total transaction amount")
    details: Details


class ProgramOutputs(BaseModel):
    customer_summary: CustomerSummary
    transaction_summary: TransactionSummary


### Program definition ###

import dspy


class BaseSignature(dspy.Signature):
    """
    This AI program is designed to process complex datasets with multiple nested input fields and produce structured output fields. It can handle cases where nested fields have the same name but different types, ensuring that the data is accurately processed and transformed. The program is particularly useful for applications that require detailed data analysis, integration of multiple data sources, and handling of heterogeneous data types.
    """


program_signature = BaseSignature
for input_field_name, input_field in ProgramInputs.model_fields.items():
    program_signature = program_signature.append(
        name=input_field_name,
        field=dspy.InputField(description=input_field.description),
        type_=input_field.annotation,
    )
for output_field_name, output_field in ProgramOutputs.model_fields.items():
    program_signature = program_signature.append(
        name=output_field_name,
        field=dspy.OutputField(description=input_field.description),
        type_=output_field.annotation,
    )

program = dspy.ChainOfThought(program_signature)

```

--------------------------------------------------------------------------------
/dspy/teleprompt/teleprompt_optuna.py:
--------------------------------------------------------------------------------

```python
from dspy.evaluate.evaluate import Evaluate
from dspy.teleprompt.teleprompt import Teleprompter

from .bootstrap import BootstrapFewShot


class BootstrapFewShotWithOptuna(Teleprompter):
    def __init__(
        self,
        metric,
        teacher_settings=None,
        max_bootstrapped_demos=4,
        max_labeled_demos=16,
        max_rounds=1,
        num_candidate_programs=16,
        num_threads=None,
    ):
        self.metric = metric
        self.teacher_settings = teacher_settings or {}
        self.max_rounds = max_rounds
        self.num_threads = num_threads
        self.min_num_samples = 1
        self.max_num_samples = max_bootstrapped_demos
        self.num_candidate_sets = num_candidate_programs
        # self.max_num_traces = 1 + int(max_bootstrapped_demos / 2.0 * self.num_candidate_sets)

        # Semi-hacky way to get the parent class's _bootstrap function to stop early.
        # self.max_bootstrapped_demos = self.max_num_traces
        self.max_labeled_demos = max_labeled_demos

        print("Going to sample between", self.min_num_samples, "and", self.max_num_samples, "traces per predictor.")
        # print("Going to sample", self.max_num_traces, "traces in total.")
        print("Will attempt to train", self.num_candidate_sets, "candidate sets.")

    def objective(self, trial):
        program2 = self.student.reset_copy()
        for (name, compiled_predictor), (_, program2_predictor) in zip(
            self.compiled_teleprompter.named_predictors(), program2.named_predictors(), strict=False,
        ):
            all_demos = compiled_predictor.demos
            demo_index = trial.suggest_int(f"demo_index_for_{name}", 0, len(all_demos) - 1)
            selected_demo = dict(all_demos[demo_index])
            program2_predictor.demos = [selected_demo]
        evaluate = Evaluate(
            devset=self.valset,
            metric=self.metric,
            num_threads=self.num_threads,
            display_table=False,
            display_progress=True,
        )
        result = evaluate(program2)
        trial.set_user_attr("program", program2)
        return result.score

    def compile(self, student, *, teacher=None, max_demos, trainset, valset=None):
        import optuna
        self.trainset = trainset
        self.valset = valset or trainset
        self.student = student.reset_copy()
        self.teacher = teacher.deepcopy() if teacher is not None else student.reset_copy()
        teleprompter_optimize = BootstrapFewShot(
            metric=self.metric,
            max_bootstrapped_demos=max_demos,
            max_labeled_demos=self.max_labeled_demos,
            teacher_settings=self.teacher_settings,
            max_rounds=self.max_rounds,
        )
        self.compiled_teleprompter = teleprompter_optimize.compile(
            self.student, teacher=self.teacher, trainset=self.trainset,
        )
        study = optuna.create_study(direction="maximize")
        study.optimize(self.objective, n_trials=self.num_candidate_sets)
        best_program = study.trials[study.best_trial.number].user_attrs["program"]
        print("Best score:", study.best_value)
        print("Best program:", best_program)
        return best_program

```

--------------------------------------------------------------------------------
/tests/reliability/complex_types/generated/test_many_types_1/inputs/input2.json:
--------------------------------------------------------------------------------

```json
{
  "assertions": [
    "The 'processedTupleField' should be an tuple with exactly two elements: the first element being a string and the second element being a number.",
    "The 'processedEnumField' should be one of the predefined options: 'option1', 'option2', or 'option3'.",
    "The 'processedDatetimeField' should be a date-time",
    "The 'processedLiteralField' should be the enum 'literalValue'.",
    "The 'processedObjectField' should be an object containing 'subField1' as a string, 'subField2' as a number, and an 'additionalField' as a boolean.",
    "The 'processedNestedObjectField' should be an object containing 'tupleField' as a tuple with a string and float, 'enumField' as one of the predefined options (option1, option2, or option3), 'datetimeField' as a 'date-time' object, 'literalField' as the string 'literalValue', and an 'additionalField' as a boolean."
  ],
  "input": {
    "datetimeField": "2023-10-01T12:00:00Z",
    "enumField": "option1",
    "literalField": "literalValue",
    "nestedObjectField": {
      "datetimeField": "2023-11-01T12:00:00Z",
      "enumField": "option2",
      "literalField": "literalValue",
      "tupleField": ["nestedString", 789]
    },
    "objectField": {
      "subField1": "Patriotism is a feeling of love, devotion, and sense of attachment to one's country. This attachment can be a combination of many different feelings relating to one's homeland, including ethnic, cultural, political or historical aspects. It encompasses a set of concepts closely related to those of nationalism. In the context of patriotism, people may express their feelings in a variety of ways, including supporting their country's interests and policies, celebrating national holidays, and participating in civic activities. Patriotism often involves a sense of pride in one's country and a willingness to defend it against any threats. It can also include a commitment to improving the country and making it a better place for future generations. The concept of patriotism is often linked with the idea of national identity, which is the sense of a nation as a cohesive whole, as represented by distinctive traditions, culture, language, and politics. Patriots may feel a strong sense of loyalty and duty to their country, and they may take actions to support and protect it. However, it is important to note that patriotism can also be a complex and sometimes controversial concept. While it can inspire positive actions and a sense of community, it can also lead to exclusionary or aggressive behaviors if taken to an extreme. In some cases, excessive patriotism can result in nationalism, which can lead to conflicts with other nations or groups. Despite these potential issues, many people view patriotism as a positive force that can unite people and inspire them to work together for the common good. It can foster a sense of belonging and purpose, and it can motivate individuals to contribute to the well-being of their country. Overall, patriotism is a multifaceted and deeply personal sentiment that can manifest in many different ways, depending on an individual's experiences, beliefs, and values.",
      "subField2": 456
    },
    "tupleField": ["exampleString", 123]
  }
}

```

--------------------------------------------------------------------------------
/tests/test_utils/server/litellm_server.py:
--------------------------------------------------------------------------------

```python
import json
import os
from typing import AsyncIterator, Iterator

import litellm
from litellm import CustomLLM
from litellm.types.utils import GenericStreamingChunk

LITELLM_TEST_SERVER_LOG_FILE_PATH_ENV_VAR = "LITELLM_TEST_SERVER_LOG_FILE_PATH"


class DSPyTestModel(CustomLLM):
    def completion(self, *args, **kwargs) -> litellm.ModelResponse:
        _append_request_to_log_file(kwargs)
        return _get_mock_llm_response(kwargs)

    async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse:
        _append_request_to_log_file(kwargs)
        return _get_mock_llm_response(kwargs)

    def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
        generic_streaming_chunk: GenericStreamingChunk = {
            "finish_reason": "stop",
            "index": 0,
            "is_finished": True,
            "text": '{"output_text": "Hello!"}',
            "tool_use": None,
            "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
        }
        return generic_streaming_chunk  # type: ignore

    async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
        generic_streaming_chunk: GenericStreamingChunk = {
            "finish_reason": "stop",
            "index": 0,
            "is_finished": True,
            "text": '{"output_text": "Hello!"}',
            "tool_use": None,
            "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
        }
        yield generic_streaming_chunk


def _get_mock_llm_response(request_kwargs):
    _throw_exception_based_on_content_if_applicable(request_kwargs)
    return litellm.completion(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": "Hello world"}],
        usage={"prompt_tokens": 10, "completion_tokens": 10, "total_tokens": 20},
        mock_response="Hi!",
    )


def _throw_exception_based_on_content_if_applicable(request_kwargs):
    """
    Throws an exception, for testing purposes, based on the content of the request message.
    """
    model = request_kwargs["model"]
    content = request_kwargs["messages"][0]["content"]
    if "429" in content:
        raise litellm.RateLimitError(message="Rate limit exceeded", llm_provider=None, model=model)
    elif "504" in content:
        raise litellm.Timeout("Request timed out!", llm_provider=None, model=model)
    elif "400" in content:
        raise litellm.BadRequestError(message="Bad request", llm_provider=None, model=model)
    elif "401" in content:
        raise litellm.AuthenticationError(message="Authentication error", llm_provider=None, model=model)


def _append_request_to_log_file(completion_kwargs):
    log_file_path = os.environ.get(LITELLM_TEST_SERVER_LOG_FILE_PATH_ENV_VAR)
    if log_file_path is None:
        raise ValueError(
            "Server logs file path is not defined! Please set the path using the"
            + f" {LITELLM_TEST_SERVER_LOG_FILE_PATH_ENV_VAR} environment variable."
        )

    with open(log_file_path, "a") as f:
        log_blob = (
            {
                "model": completion_kwargs["model"],
                "messages": completion_kwargs["messages"],
            },
        )
        json.dump(log_blob, f)
        f.write("\n")


dspy_test_model = DSPyTestModel()

```

--------------------------------------------------------------------------------
/docs/docs/api/modules/CodeAct.md:
--------------------------------------------------------------------------------

```markdown
# dspy.CodeAct

<!-- START_API_REF -->
::: dspy.CodeAct
    handler: python
    options:
        members:
            - __call__
            - batch
            - deepcopy
            - dump_state
            - get_lm
            - inspect_history
            - load
            - load_state
            - map_named_predictors
            - named_parameters
            - named_predictors
            - named_sub_modules
            - parameters
            - predictors
            - reset_copy
            - save
            - set_lm
        show_source: true
        show_root_heading: true
        heading_level: 2
        docstring_style: google
        show_root_full_path: true
        show_object_full_path: false
        separate_signature: false
        inherited_members: true
<!-- END_API_REF -->

# CodeAct

CodeAct is a DSPy module that combines code generation with tool execution to solve problems. It generates Python code snippets that use provided tools and the Python standard library to accomplish tasks.

## Basic Usage

Here's a simple example of using CodeAct:

```python
import dspy
from dspy.predict import CodeAct

# Define a simple tool function
def factorial(n: int) -> int:
    """Calculate the factorial of a number."""
    if n == 1:
        return 1
    return n * factorial(n-1)

# Create a CodeAct instance
act = CodeAct("n->factorial_result", tools=[factorial])

# Use the CodeAct instance
result = act(n=5)
print(result) # Will calculate factorial(5) = 120
```

## How It Works

CodeAct operates in an iterative manner:

1. Takes input parameters and available tools
2. Generates Python code snippets that use these tools
3. Executes the code using a Python sandbox
4. Collects the output and determines if the task is complete
5. Answer the original question based on the collected information

## ⚠️ Limitations

### Only accepts pure functions as tools (no callable objects)

The following example does not work due to the usage of a callable object.

```python
# ❌ NG
class Add():
    def __call__(self, a: int, b: int):
        return a + b

dspy.CodeAct("question -> answer", tools=[Add()])
```

### External libraries cannot be used

The following example does not work due to the usage of the external library `numpy`.

```python
# ❌ NG
import numpy as np

def exp(i: int):
    return np.exp(i)

dspy.CodeAct("question -> answer", tools=[exp])
```

### All dependent functions need to be passed to `CodeAct`

Functions that depend on other functions or classes not passed to `CodeAct` cannot be used. The following example does not work because the tool functions depend on other functions or classes that are not passed to `CodeAct`, such as `Profile` or `secret_function`.

```python
# ❌ NG
from pydantic import BaseModel

class Profile(BaseModel):
    name: str
    age: int
    
def age(profile: Profile):
    return 

def parent_function():
    print("Hi!")

def child_function():
    parent_function()

dspy.CodeAct("question -> answer", tools=[age, child_function])
```

Instead, the following example works since all necessary tool functions are passed to `CodeAct`:

```python
# ✅ OK

def parent_function():
    print("Hi!")

def child_function():
    parent_function()

dspy.CodeAct("question -> answer", tools=[parent_function, child_function])
```

```

--------------------------------------------------------------------------------
/dspy/datasets/hotpotqa.py:
--------------------------------------------------------------------------------

```python
import random

from dspy.datasets.dataset import Dataset


class HotPotQA(Dataset):
    def __init__(
        self,
        *args,
        only_hard_examples=True,
        keep_details="dev_titles",
        unofficial_dev=True,
        **kwargs,
    ) -> None:
        super().__init__(*args, **kwargs)
        assert only_hard_examples, (
            "Care must be taken when adding support for easy examples."
            "Dev must be all hard to match official dev, but training can be flexible."
        )

        from datasets import load_dataset

        hf_official_train = load_dataset("hotpot_qa", "fullwiki", split="train")
        hf_official_dev = load_dataset("hotpot_qa", "fullwiki", split="validation")

        official_train = []
        for raw_example in hf_official_train:
            if raw_example["level"] == "hard":
                if keep_details is True:
                    keys = ["id", "question", "answer", "type", "supporting_facts", "context"]
                elif keep_details == "dev_titles":
                    keys = ["question", "answer", "supporting_facts"]
                else:
                    keys = ["question", "answer"]

                example = {k: raw_example[k] for k in keys}

                if "supporting_facts" in example:
                    example["gold_titles"] = set(example["supporting_facts"]["title"])
                    del example["supporting_facts"]

                official_train.append(example)

        rng = random.Random(0)
        rng.shuffle(official_train)

        self._train = official_train[: len(official_train) * 75 // 100]

        if unofficial_dev:
            self._dev = official_train[len(official_train) * 75 // 100 :]
        else:
            self._dev = None

        for example in self._train:
            if keep_details == "dev_titles":
                del example["gold_titles"]

        test = []
        for raw_example in hf_official_dev:
            assert raw_example["level"] == "hard"
            example = {k: raw_example[k] for k in ["id", "question", "answer", "type", "supporting_facts"]}
            if "supporting_facts" in example:
                example["gold_titles"] = set(example["supporting_facts"]["title"])
                del example["supporting_facts"]
            test.append(example)

        self._test = test


if __name__ == "__main__":
    from dspy.dsp.utils import dotdict

    data_args = dotdict(train_seed=1, train_size=16, eval_seed=2023, dev_size=200 * 5, test_size=0)
    dataset = HotPotQA(**data_args)

    print(dataset)
    print(dataset.train[0].question)
    print(dataset.train[15].question)

    print(len(dataset.train), len(dataset.dev), len(dataset.test))

    print(dataset.dev[0].question)
    print(dataset.dev[340].question)
    print(dataset.dev[937].question)

"""
What was the population of the city where Woodward Avenue ends in 2010?
Where did the star , who is also an executive producer, of the Mick begin her carrer?
16 1000 0
Both London and German have seen attacks during war, there was one specific type of attack that Germany called the blitz, what did London call a similar attack?
Pre-Madonna was a collection of demos by the singer who was a leading presence during the emergence of what network?
Alan Mills composed the classic folk song that tells the story of what?
"""

```

--------------------------------------------------------------------------------
/dspy/predict/best_of_n.py:
--------------------------------------------------------------------------------

```python
from typing import Callable

import dspy
from dspy.predict.predict import Module, Prediction


class BestOfN(Module):
    def __init__(
        self,
        module: Module,
        N: int,  # noqa: N803
        reward_fn: Callable[[dict, Prediction], float],
        threshold: float,
        fail_count: int | None = None,
    ):
        """
        Runs a module up to `N` times with different rollout IDs at `temperature=1.0` and
        returns the best prediction out of `N` attempts or the first prediction that passes the
        `threshold`.

        Args:
            module (Module): The module to run.
            N (int): The number of times to run the module.
            reward_fn (Callable[[dict, Prediction], float]): The reward function which takes in the args passed to the module, the resulting prediction, and returns a scalar reward.
            threshold (float): The threshold for the reward function.
            fail_count (Optional[int], optional): The number of times the module can fail before raising an error. Defaults to N if not provided.

        Example:
            ```python
            import dspy

            dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini"))

            # Define a QA module with chain of thought
            qa = dspy.ChainOfThought("question -> answer")

            # Define a reward function that checks for one-word answers
            def one_word_answer(args, pred):
                return 1.0 if len(pred.answer.split()) == 1 else 0.0

            # Create a refined module that tries up to 3 times
            best_of_3 = dspy.BestOfN(module=qa, N=3, reward_fn=one_word_answer, threshold=1.0)

            # Use the refined module
            result = best_of_3(question="What is the capital of Belgium?").answer
            # Returns: Brussels
            ```
        """
        self.module = module
        self.reward_fn = lambda *args: reward_fn(*args)  # to prevent this from becoming a parameter
        self.threshold = threshold
        self.N = N
        self.fail_count = fail_count or N  # default to N if fail_count is not provided

    def forward(self, **kwargs):
        lm = self.module.get_lm() or dspy.settings.lm
        start = lm.kwargs.get("rollout_id", 0)
        rollout_ids = [start + i for i in range(self.N)]
        best_pred, best_trace, best_reward = None, None, -float("inf")

        for idx, rid in enumerate(rollout_ids):
            lm_ = lm.copy(rollout_id=rid, temperature=1.0)
            mod = self.module.deepcopy()
            mod.set_lm(lm_)

            try:
                with dspy.context(trace=[]):
                    pred = mod(**kwargs)
                    trace = dspy.settings.trace.copy()

                    # NOTE: Not including the trace of reward_fn.
                    reward = self.reward_fn(kwargs, pred)

                if reward > best_reward:
                    best_reward, best_pred, best_trace = reward, pred, trace

                if reward >= self.threshold:
                    break

            except Exception as e:
                print(f"BestOfN: Attempt {idx + 1} failed with rollout id {rid}: {e}")
                if idx > self.fail_count:
                    raise e
                self.fail_count -= 1

        if best_trace:
            dspy.settings.trace.extend(best_trace)
        return best_pred

```

--------------------------------------------------------------------------------
/dspy/clients/provider.py:
--------------------------------------------------------------------------------

```python
from abc import abstractmethod
from concurrent.futures import Future
from threading import Thread
from typing import TYPE_CHECKING, Any

from dspy.clients.utils_finetune import MultiGPUConfig, TrainDataFormat

if TYPE_CHECKING:
    from dspy.clients.lm import LM


class TrainingJob(Future):
    def __init__(
        self,
        thread: Thread | None = None,
        model: str | None = None,
        train_data: list[dict[str, Any]] | None = None,
        train_data_format: TrainDataFormat | None = None,
        train_kwargs: dict[str, Any] | None = None,
    ):
        self.thread = thread
        self.model = model
        self.train_data = train_data
        self.train_data_format = train_data_format
        self.train_kwargs = train_kwargs or {}
        super().__init__()

    # Subclasses should override the cancel method to cancel the job; then call
    # the super's cancel method so that the future can be cancelled.
    def cancel(self):
        super().cancel()

    @abstractmethod
    def status(self):
        raise NotImplementedError


class ReinforceJob:
    def __init__(self, lm: "LM", train_kwargs: dict[str, Any] | None = None, gpu_config: MultiGPUConfig = MultiGPUConfig(num_inference_gpus=1, num_training_gpus=1)):
        self.lm = lm
        self.train_kwargs = train_kwargs or {}
        self.gpu_config = gpu_config
        self.checkpoints = {}
        self.last_checkpoint = None
        self.gpu_config = gpu_config


    @abstractmethod
    def initialize(self):
        raise NotImplementedError

    @abstractmethod
    def step(self, train_data: list[dict[str, Any]], train_data_format: TrainDataFormat | str | None = None):
        raise NotImplementedError

    @abstractmethod
    def terminate(self):
        raise NotImplementedError

    @abstractmethod
    def update_model(self):
        raise NotImplementedError

    @abstractmethod
    def save_checkpoint(self, checkpoint_name: str):
        raise NotImplementedError

    def cancel(self):
        raise NotImplementedError

    def status(self):
        raise NotImplementedError


class Provider:
    def __init__(self):
        self.finetunable = False
        self.reinforceable = False
        self.TrainingJob = TrainingJob
        self.ReinforceJob = ReinforceJob

    @staticmethod
    def is_provider_model(model: str) -> bool:
        # Subclasses should actually check whether a model is supported if they
        # want to have the model provider auto-discovered.
        return False

    @staticmethod
    def launch(lm: "LM", launch_kwargs: dict[str, Any] | None = None):
        # Note that "launch" and "kill" methods might be called even if there
        # is a launched LM or no launched LM to kill. These methods should be
        # resillient to such cases.
        pass

    @staticmethod
    def kill(lm: "LM", launch_kwargs: dict[str, Any] | None = None):
        # We assume that LM.launch_kwargs dictionary will contain the necessary
        # information for a provider to launch and/or kill an LM. This is the
        # reeason why the argument here is named launch_kwargs and not
        # kill_kwargs.
        pass

    @staticmethod
    def finetune(
        job: TrainingJob,
        model: str,
        train_data: list[dict[str, Any]],
        train_data_format: TrainDataFormat | str | None,
        train_kwargs: dict[str, Any] | None = None,
    ) -> str:
        raise NotImplementedError

```

--------------------------------------------------------------------------------
/dspy/datasets/colors.py:
--------------------------------------------------------------------------------

```python
import random

from dspy.datasets.dataset import Dataset

### A bunch of colors, originally from matplotlib
all_colors = [
    "alice blue",
    "dodger blue",
    "light sky blue",
    "deep sky blue",
    "sky blue",
    "steel blue",
    "light steel blue",
    "medium blue",
    "navy blue",
    "blue",
    "royal blue",
    "cadet blue",
    "cornflower blue",
    "medium slate blue",
    "slate blue",
    "dark slate blue",
    "powder blue",
    "turquoise",
    "dark turquoise",
    "medium turquoise",
    "pale turquoise",
    "light sea green",
    "medium sea green",
    "sea green",
    "forest green",
    "green yellow",
    "lime green",
    "dark green",
    "green",
    "lime",
    "chartreuse",
    "lawn green",
    "yellow green",
    "olive green",
    "dark olive green",
    "medium spring green",
    "spring green",
    "medium aquamarine",
    "aquamarine",
    "aqua",
    "cyan",
    "dark cyan",
    "teal",
    "medium orchid",
    "dark orchid",
    "orchid",
    "blue violet",
    "violet",
    "dark violet",
    "plum",
    "thistle",
    "magenta",
    "fuchsia",
    "dark magenta",
    "medium purple",
    "purple",
    "rebecca purple",
    "dark red",
    "fire brick",
    "indian red",
    "light coral",
    "dark salmon",
    "light salmon",
    "salmon",
    "red",
    "crimson",
    "tomato",
    "coral",
    "orange red",
    "dark orange",
    "orange",
    "yellow",
    "gold",
    "light goldenrod yellow",
    "pale goldenrod",
    "goldenrod",
    "dark goldenrod",
    "beige",
    "moccasin",
    "blanched almond",
    "navajo white",
    "antique white",
    "bisque",
    "burlywood",
    "dark khaki",
    "khaki",
    "tan",
    "wheat",
    "snow",
    "floral white",
    "old lace",
    "ivory",
    "linen",
    "seashell",
    "honeydew",
    "mint cream",
    "azure",
    "lavender",
    "ghost white",
    "white smoke",
    "gainsboro",
    "light gray",
    "silver",
    "dark gray",
    "gray",
    "dim gray",
    "slate gray",
    "light slate gray",
    "dark slate gray",
    "black",
    "medium violet red",
    "pale violet red",
    "deep pink",
    "hot pink",
    "light pink",
    "pink",
    "peach puff",
    "rosy brown",
    "saddle brown",
    "sandy brown",
    "chocolate",
    "peru",
    "sienna",
    "brown",
    "maroon",
    "white",
    "misty rose",
    "lavender blush",
    "papaya whip",
    "lemon chiffon",
    "light yellow",
    "corn silk",
    "pale green",
    "light green",
    "olive drab",
    "olive",
    "dark sea green",
]


class Colors(Dataset):
    def __init__(self, sort_by_suffix=True, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)

        self.sort_by_suffix = sort_by_suffix
        colors = self.sorted_by_suffix(all_colors)

        train_size = int(
            len(colors) * 0.6
        )  # chosen to ensure that similar colors aren't repeated between train and dev
        train_colors, dev_colors = colors[:train_size], colors[train_size:]

        self._train = [{"color": color} for color in train_colors]
        self._dev = [{"color": color} for color in dev_colors]

        random.Random(0).shuffle(self._train)
        random.Random(0).shuffle(self._dev)

    def sorted_by_suffix(self, colors):
        if not self.sort_by_suffix:
            return colors

        if isinstance(colors[0], str):
            sorted_colors = sorted(colors, key=lambda x: x[::-1])
        else:
            sorted_colors = sorted(colors, key=lambda x: x["color"][::-1])

        return sorted_colors

```

--------------------------------------------------------------------------------
/dspy/utils/unbatchify.py:
--------------------------------------------------------------------------------

```python
import queue
import threading
import time
from concurrent.futures import Future
from typing import Any, Callable


class Unbatchify:
    def __init__(
        self,
        batch_fn: Callable[[list[Any]], list[Any]],
        max_batch_size: int = 32,
        max_wait_time: float = 0.1
    ):
        """
        Initializes the Unbatchify.

        Args:
            batch_fn: The batch-processing function that accepts a list of inputs and returns a list of outputs.
            max_batch_size: The maximum number of items to include in a batch.
            max_wait_time: The maximum time (in seconds) to wait for batch to fill before processing.
        """

        self.batch_fn = batch_fn
        self.max_batch_size = max_batch_size
        self.max_wait_time = max_wait_time
        self.input_queue = queue.Queue()
        self.stop_event = threading.Event()
        self.worker_thread = threading.Thread(target=self._worker)
        self.worker_thread.daemon = True  # Ensures thread exits when main program exits
        self.worker_thread.start()

    def __call__(self, input_item: Any) -> Any:
        """
        Thread-safe function that accepts a single input and returns the corresponding output.

        Args:
            input_item: The single input item to process.

        Returns:
            The output corresponding to the input_item after processing through batch_fn.
        """
        future = Future()
        self.input_queue.put((input_item, future))
        try:
            result = future.result()
        except Exception as e:
            raise e
        return result

    def _worker(self):
        """
        Worker thread that batches inputs and processes them using batch_fn.
        """
        while not self.stop_event.is_set():
            batch = []
            futures = []
            start_time = time.time()
            while len(batch) < self.max_batch_size and (time.time() - start_time) < self.max_wait_time:
                try:
                    input_item, future = self.input_queue.get(timeout=self.max_wait_time)
                    batch.append(input_item)
                    futures.append(future)
                except queue.Empty:
                    break

            if batch:
                try:
                    outputs = self.batch_fn(batch)
                    for output, future in zip(outputs, futures, strict=False):
                        future.set_result(output)
                except Exception as e:
                    for future in futures:
                        future.set_exception(e)
            else:
                time.sleep(0.01)

        # Clean up remaining items when stopping
        while True:
            try:
                _, future = self.input_queue.get_nowait()
                future.set_exception(RuntimeError("Unbatchify is closed"))
            except queue.Empty:
                break

        print("Worker thread has been terminated.")

    def close(self):
        """
        Stops the worker thread and cleans up resources.
        """
        if not self.stop_event.is_set():
            self.stop_event.set()
            self.worker_thread.join()

    def __enter__(self):
        """
        Enables use as a context manager.
        """
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        """
        Ensures resources are cleaned up when exiting context.
        """
        self.close()

    def __del__(self):
        """
        Ensures the worker thread is terminated when the object is garbage collected.
        """
        self.close()

```

--------------------------------------------------------------------------------
/dspy/signatures/field.py:
--------------------------------------------------------------------------------

```python
import pydantic

# The following arguments can be used in DSPy InputField and OutputField in addition
# to the standard pydantic.Field arguments. We just hope pydanitc doesn't add these,
# as it would give a name clash.
DSPY_FIELD_ARG_NAMES = ["desc", "prefix", "format", "parser", "__dspy_field_type"]

PYDANTIC_CONSTRAINT_MAP = {
    "gt": "greater than: ",
    "ge": "greater than or equal to: ",
    "lt": "less than: ",
    "le": "less than or equal to: ",
    "min_length": "minimum length: ",
    "max_length": "maximum length: ",
    "multiple_of": "a multiple of the given number: ",
    "allow_inf_nan": "allow 'inf', '-inf', 'nan' values: ",
}


def move_kwargs(**kwargs):
    # Pydantic doesn't allow arbitrary arguments to be given to fields,
    # but asks that
    # > any extra data you want to add to the JSON schema should be passed
    # > as a dictionary to the json_schema_extra keyword argument.
    # See: https://docs.pydantic.dev/2.6/migration/#changes-to-pydanticfield
    pydantic_kwargs = {}
    json_schema_extra = {}
    for k, v in kwargs.items():
        if k in DSPY_FIELD_ARG_NAMES:
            json_schema_extra[k] = v
        else:
            pydantic_kwargs[k] = v
    # Also copy over the pydantic "description" if no dspy "desc" is given.
    if "description" in kwargs and "desc" not in json_schema_extra:
        json_schema_extra["desc"] = kwargs["description"]
    constraints = _translate_pydantic_field_constraints(**kwargs)
    if constraints:
        json_schema_extra["constraints"] = constraints
    pydantic_kwargs["json_schema_extra"] = json_schema_extra
    return pydantic_kwargs


def _translate_pydantic_field_constraints(**kwargs):
    """Extracts Pydantic constraints and translates them into human-readable format."""

    constraints = []
    for key, value in kwargs.items():
        if key in PYDANTIC_CONSTRAINT_MAP:
            constraints.append(f"{PYDANTIC_CONSTRAINT_MAP[key]}{value}")

    return ", ".join(constraints)


def InputField(**kwargs): # noqa: N802
    return pydantic.Field(**move_kwargs(**kwargs, __dspy_field_type="input"))


def OutputField(**kwargs): # noqa: N802
    return pydantic.Field(**move_kwargs(**kwargs, __dspy_field_type="output"))


def new_to_old_field(field):
    return (OldInputField if field.json_schema_extra["__dspy_field_type"] == "input" else OldOutputField)(
        prefix=field.json_schema_extra["prefix"],
        desc=field.json_schema_extra["desc"],
        format=field.json_schema_extra.get("format"),
    )


class OldField:
    """A more ergonomic datatype that infers prefix and desc if omitted."""

    def __init__(self, *, prefix=None, desc=None, input, format=None):
        self.prefix = prefix  # This can be None initially and set later
        self.desc = desc
        self.format = format

    def finalize(self, key, inferred_prefix):
        """Set the prefix if it's not provided explicitly."""
        if self.prefix is None:
            self.prefix = inferred_prefix + ":"

        if self.desc is None:
            self.desc = f"${{{key}}}"

    def __repr__(self):
        return f"{self.__class__.__name__}(prefix={self.prefix}, desc={self.desc})"

    def __eq__(self, __value: object) -> bool:
        return self.__dict__ == __value.__dict__


class OldInputField(OldField):
    def __init__(self, *, prefix=None, desc=None, format=None):
        super().__init__(prefix=prefix, desc=desc, input=True, format=format)


class OldOutputField(OldField):
    def __init__(self, *, prefix=None, desc=None, format=None):
        super().__init__(prefix=prefix, desc=desc, input=False, format=format)

```

--------------------------------------------------------------------------------
/dspy/clients/__init__.py:
--------------------------------------------------------------------------------

```python
import logging
import os
from pathlib import Path

import litellm

from dspy.clients.base_lm import BaseLM, inspect_history
from dspy.clients.cache import Cache
from dspy.clients.embedding import Embedder
from dspy.clients.lm import LM
from dspy.clients.provider import Provider, TrainingJob

logger = logging.getLogger(__name__)

DISK_CACHE_DIR = os.environ.get("DSPY_CACHEDIR") or os.path.join(Path.home(), ".dspy_cache")
DISK_CACHE_LIMIT = int(os.environ.get("DSPY_CACHE_LIMIT", 3e10))  # 30 GB default
def configure_cache(
    enable_disk_cache: bool | None = True,
    enable_memory_cache: bool | None = True,
    disk_cache_dir: str | None = DISK_CACHE_DIR,
    disk_size_limit_bytes: int | None = DISK_CACHE_LIMIT,
    memory_max_entries: int | None = 1000000,
):
    """Configure the cache for DSPy.

    Args:
        enable_disk_cache: Whether to enable on-disk cache.
        enable_memory_cache: Whether to enable in-memory cache.
        disk_cache_dir: The directory to store the on-disk cache.
        disk_size_limit_bytes: The size limit of the on-disk cache.
        memory_max_entries: The maximum number of entries in the in-memory cache.
    """

    DSPY_CACHE = Cache(
        enable_disk_cache,
        enable_memory_cache,
        disk_cache_dir,
        disk_size_limit_bytes,
        memory_max_entries,
    )

    import dspy
    # Update the reference to point to the new cache
    dspy.cache = DSPY_CACHE


litellm.telemetry = False
litellm.cache = None  # By default we disable LiteLLM cache and use DSPy on-disk cache.

def _get_dspy_cache():
    disk_cache_dir = os.environ.get("DSPY_CACHEDIR") or os.path.join(Path.home(), ".dspy_cache")
    disk_cache_limit = int(os.environ.get("DSPY_CACHE_LIMIT", 3e10))

    try:
        _dspy_cache = Cache(
            enable_disk_cache=True,
            enable_memory_cache=True,
            disk_cache_dir=disk_cache_dir,
            disk_size_limit_bytes=disk_cache_limit,
            memory_max_entries=1000000,
        )
    except Exception as e:
        # If cache creation fails (e.g., in AWS Lambda), create a memory-only cache
        logger.warning("Failed to initialize disk cache, falling back to memory-only cache: %s", e)
        _dspy_cache = Cache(
            enable_disk_cache=False,
            enable_memory_cache=True,
            disk_cache_dir=disk_cache_dir,
            disk_size_limit_bytes=disk_cache_limit,
            memory_max_entries=1000000,
        )
    return _dspy_cache

DSPY_CACHE = _get_dspy_cache()

if "LITELLM_LOCAL_MODEL_COST_MAP" not in os.environ:
    # Accessed at run time by litellm; i.e., fine to keep after import
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"


def configure_litellm_logging(level: str = "ERROR"):
    """Configure LiteLLM logging to the specified level."""
    # Litellm uses a global logger called `verbose_logger` to control all loggings.
    from litellm._logging import verbose_logger

    numeric_logging_level = getattr(logging, level)

    verbose_logger.setLevel(numeric_logging_level)
    for h in verbose_logger.handlers:
        h.setLevel(numeric_logging_level)


def enable_litellm_logging():
    litellm.suppress_debug_info = False
    configure_litellm_logging("DEBUG")


def disable_litellm_logging():
    litellm.suppress_debug_info = True
    configure_litellm_logging("ERROR")


# By default, we disable LiteLLM logging for clean logging
disable_litellm_logging()

__all__ = [
    "BaseLM",
    "LM",
    "Provider",
    "TrainingJob",
    "inspect_history",
    "Embedder",
    "enable_litellm_logging",
    "disable_litellm_logging",
    "configure_cache",
]

```

--------------------------------------------------------------------------------
/tests/adapters/test_adapter_utils.py:
--------------------------------------------------------------------------------

```python
# ruff: noqa: UP007

from typing import Literal, Optional, Union

import pytest
from pydantic import BaseModel

from dspy.adapters.utils import parse_value


class Profile(BaseModel):
    name: str
    age: int


def test_parse_value_str_annotation():
    # Test basic string conversion
    assert parse_value(123, str) == "123"
    assert parse_value(True, str) == "True"
    assert parse_value("hello", str) == "hello"
    assert parse_value(None, str) == "None"
    assert parse_value([1, 2, 3], str) == "[1, 2, 3]"


def test_parse_value_pydantic_types():
    # Test with pydantic BaseModel - JSON string input
    json_str = '{"name": "John", "age": 30}'
    result = parse_value(json_str, Profile)
    assert isinstance(result, Profile)
    assert result.name == "John"
    assert result.age == 30

    # Test with pydantic BaseModel - dict input
    dict_input = {"name": "Jane", "age": 25}
    result = parse_value(dict_input, Profile)
    assert isinstance(result, Profile)
    assert result.name == "Jane"
    assert result.age == 25

    # Test with invalid pydantic data
    with pytest.raises(Exception):
        parse_value('{"name": "John"}', Profile)  # missing required age field


def test_parse_value_basic_types():
    # Test int
    assert parse_value("42", int) == 42
    assert parse_value(42, int) == 42

    # Test float
    assert parse_value("3.14", float) == 3.14
    assert parse_value(3.14, float) == 3.14

    # Test bool
    assert parse_value("true", bool) is True
    assert parse_value(True, bool) is True
    assert parse_value("false", bool) is False

    # Test list
    assert parse_value("[1, 2, 3]", list[int]) == [1, 2, 3]
    assert parse_value([1, 2, 3], list[int]) == [1, 2, 3]


def test_parse_value_literal():
    # Test Literal type
    assert parse_value("option1", Literal["option1", "option2"]) == "option1"
    assert parse_value("option2", Literal["option1", "option2"]) == "option2"

    # Test Literal with quotes and prefixes
    assert parse_value("'option1'", Literal["option1", "option2"]) == "option1"
    assert parse_value('"option1"', Literal["option1", "option2"]) == "option1"
    assert parse_value("Literal[option1]", Literal["option1", "option2"]) == "option1"
    assert parse_value("str[option1]", Literal["option1", "option2"]) == "option1"

    # Test invalid literal
    with pytest.raises(ValueError):
        parse_value("invalid", Literal["option1", "option2"])


def test_parse_value_union():
    # Test Union with None (Optional)
    assert parse_value("test", Optional[str]) == "test"
    assert parse_value("test", str | None) == "test"
    assert parse_value("5", int | None) == 5
    assert parse_value(None, Optional[str]) is None
    assert parse_value("text with [placeholder]", Optional[str]) == "text with [placeholder]"
    assert parse_value("text with [placeholder]", str | None) == "text with [placeholder]"

    # Test Union fallback to str
    assert parse_value("fallback", Union[int, str, None]) == "fallback"
    assert parse_value(5, Union[int, str, None]) == 5
    assert parse_value("fallback", int | str | None) == "fallback"
    assert parse_value(5, int | str | None) == 5
    assert parse_value("text with [placeholder]", Union[int, str, None]) == "text with [placeholder]"


def test_parse_value_json_repair():
    # Test cases where json_repair is needed
    assert parse_value('{"key": "value"}', dict) == {"key": "value"}

    # Test ast.literal_eval fallback
    assert parse_value("{'key': 'value'}", dict) == {"key": "value"}

    # Test fallback to original value when parsing fails
    malformed = "not json or literal"
    with pytest.raises(Exception):
        parse_value(malformed, dict)

```

--------------------------------------------------------------------------------
/dspy/adapters/types/document.py:
--------------------------------------------------------------------------------

```python
from typing import Any, Literal

import pydantic

from dspy.adapters.types.base_type import Type
from dspy.utils.annotation import experimental


@experimental(version="3.0.4")
class Document(Type):
    """A document type for providing content that can be cited by language models.

    This type represents documents that can be passed to language models for citation-enabled
    responses, particularly useful with Anthropic's Citations API. Documents include the content
    and metadata that helps the LM understand and reference the source material.

    Attributes:
        data: The text content of the document
        title: Optional title for the document (used in citations)
        media_type: MIME type of the document content (defaults to "text/plain")
        context: Optional context information about the document

    Example:
        ```python
        import dspy
        from dspy.signatures import Signature
        from dspy.experimental import Document, Citations

        class AnswerWithSources(Signature):
            '''Answer questions using provided documents with citations.'''
            documents: list[Document] = dspy.InputField()
            question: str = dspy.InputField()
            answer: str = dspy.OutputField()
            citations: Citations = dspy.OutputField()

        # Create documents
        docs = [
            Document(
                data="The Earth orbits the Sun in an elliptical path.",
                title="Basic Astronomy Facts"
            ),
            Document(
                data="Water boils at 100°C at standard atmospheric pressure.",
                title="Physics Fundamentals",
            )
        ]

        # Use with a citation-supporting model
        lm = dspy.LM("anthropic/claude-opus-4-1-20250805")
        predictor = dspy.Predict(AnswerWithSources)
        result = predictor(documents=docs, question="What temperature does water boil?", lm=lm)
        print(result.citations)
        ```
    """

    data: str
    title: str | None = None
    media_type: Literal["text/plain", "application/pdf"] = "text/plain"
    context: str | None = None

    def format(self) -> list[dict[str, Any]]:
        """Format document for LM consumption.

        Returns:
            A list containing the document block in the format expected by citation-enabled language models.
        """
        document_block = {
            "type": "document",
            "source": {
                "type": "text",
                "media_type": self.media_type,
                "data": self.data
            },
            "citations": {"enabled": True}
        }

        if self.title:
            document_block["title"] = self.title

        if self.context:
            document_block["context"] = self.context

        return [document_block]



    @classmethod
    def description(cls) -> str:
        """Description of the document type for use in prompts."""
        return (
            "A document containing text content that can be referenced and cited. "
            "Include the full text content and optionally a title for proper referencing."
        )

    @pydantic.model_validator(mode="before")
    @classmethod
    def validate_input(cls, data: Any):
        if isinstance(data, cls):
            return data

        # Handle case where data is just a string (data only)
        if isinstance(data, str):
            return {"data": data}

        # Handle case where data is a dict
        elif isinstance(data, dict):
            return data

        raise ValueError(f"Received invalid value for `Document`: {data}")

    def __str__(self) -> str:
        """String representation showing title and content length."""
        title_part = f"'{self.title}': " if self.title else ""
        return f"Document({title_part}{len(self.data)} chars)"

```

--------------------------------------------------------------------------------
/dspy/adapters/types/code.py:
--------------------------------------------------------------------------------

```python
import re
from typing import Any, ClassVar

import pydantic
from pydantic import create_model

from dspy.adapters.types.base_type import Type


class Code(Type):
    """Code type in DSPy.

    This type is useful for code generation and code analysis.

    Example 1: dspy.Code as output type in code generation:

    ```python
    import dspy

    dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))


    class CodeGeneration(dspy.Signature):
        '''Generate python code to answer the question.'''

        question: str = dspy.InputField(description="The question to answer")
        code: dspy.Code["java"] = dspy.OutputField(description="The code to execute")


    predict = dspy.Predict(CodeGeneration)

    result = predict(question="Given an array, find if any of the two numbers sum up to 10")
    print(result.code)
    ```

    Example 2: dspy.Code as input type in code analysis:

    ```python
    import dspy
    import inspect

    dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))

    class CodeAnalysis(dspy.Signature):
        '''Analyze the time complexity of the function.'''

        code: dspy.Code["python"] = dspy.InputField(description="The function to analyze")
        result: str = dspy.OutputField(description="The time complexity of the function")


    predict = dspy.Predict(CodeAnalysis)


    def sleepsort(x):
        import time

        for i in x:
            time.sleep(i)
            print(i)

    result = predict(code=inspect.getsource(sleepsort))
    print(result.result)
    ```
    """

    code: str

    language: ClassVar[str] = "python"

    def format(self):
        return f"{self.code}"

    @pydantic.model_serializer()
    def serialize_model(self):
        """Override to bypass the <<CUSTOM-TYPE-START-IDENTIFIER>> and <<CUSTOM-TYPE-END-IDENTIFIER>> tags."""
        return self.format()

    @classmethod
    def description(cls) -> str:
        return (
            "Code represented in a string, specified in the `code` field. If this is an output field, the code "
            "field should follow the markdown code block format, e.g. \n```python\n{code}\n``` or \n```cpp\n{code}\n```"
            f"\nProgramming language: {cls.language}"
        )

    @pydantic.model_validator(mode="before")
    @classmethod
    def validate_input(cls, data: Any):
        if isinstance(data, cls):
            return data

        if isinstance(data, str):
            return {"code": _filter_code(data)}

        if isinstance(data, dict):
            if "code" not in data:
                raise ValueError("`code` field is required for `dspy.Code`")
            if not isinstance(data["code"], str):
                raise ValueError(f"`code` field must be a string, but received type: {type(data['code'])}")
            return {"code": _filter_code(data["code"])}

        raise ValueError(f"Received invalid value for `dspy.Code`: {data}")


def _filter_code(code: str) -> str:
    """Extract code from markdown code blocks, stripping any language identifier."""
    # Case 1: format like:
    # ```python
    # {code_block}
    # ```
    regex_pattern = r"```(?:[^\n]*)\n(.*?)```"
    match = re.search(regex_pattern, code, re.DOTALL)
    if match:
        return match.group(1).strip()
    # Case 2: ```<code>``` (no language, single-line)
    regex_pattern_simple = r"```(.*?)```"
    match = re.search(regex_pattern_simple, code, re.DOTALL)
    if match:
        return match.group(1).strip()
    # Fallback case
    return code


# Patch __class_getitem__ directly on the class to support dspy.Code["python"] syntax
def _code_class_getitem(cls, language):
    code_with_language_cls = create_model(f"{cls.__name__}_{language}", __base__=cls)
    code_with_language_cls.language = language
    return code_with_language_cls


Code.__class_getitem__ = classmethod(_code_class_getitem)

```

--------------------------------------------------------------------------------
/docs/docs/deep-dive/data-handling/examples.md:
--------------------------------------------------------------------------------

```markdown
---
sidebar_position: 1
---

!!! warning "This page is outdated and may not be fully accurate in DSPy 2.5"

# Examples in DSPy

Working in DSPy involves training sets, development sets, and test sets. This is like traditional ML, but you usually need far fewer labels (or zero labels) to use DSPy effectively.

The core data type for data in DSPy is `Example`. You will use **Examples** to represent items in your training set and test set. 

DSPy **Examples** are similar to Python `dict`s but have a few useful utilities. Your DSPy modules will return values of the type `Prediction`, which is a special sub-class of `Example`.

## Creating an `Example`

When you use DSPy, you will do a lot of evaluation and optimization runs. Your individual datapoints will be of type `Example`:

```python
qa_pair = dspy.Example(question="This is a question?", answer="This is an answer.")

print(qa_pair)
print(qa_pair.question)
print(qa_pair.answer)
```
**Output:**
```text
Example({'question': 'This is a question?', 'answer': 'This is an answer.'}) (input_keys=None)
This is a question?
This is an answer.
```

Examples can have any field keys and any value types, though usually values are strings.

```text
object = Example(field1=value1, field2=value2, field3=value3, ...)
```

## Specifying Input Keys

In traditional ML, there are separated "inputs" and "labels".

In DSPy, the `Example` objects have a `with_inputs()` method, which can mark specific fields as inputs. (The rest are just metadata or labels.)

```python
# Single Input.
print(qa_pair.with_inputs("question"))

# Multiple Inputs; be careful about marking your labels as inputs unless you mean it.
print(qa_pair.with_inputs("question", "answer"))
```

This flexibility allows for customized tailoring of the `Example` object for different DSPy scenarios.

When you call `with_inputs()`, you get a new copy of the example. The original object is kept unchanged.


## Element Access and Updation

Values can be accessed using the `.`(dot) operator. You can access the value of key `name` in defined object `Example(name="John Doe", job="sleep")` through `object.name`. 

To access or exclude certain keys, use `inputs()` and `labels()` methods to return new Example objects containing only input or non-input keys, respectively.

```python
article_summary = dspy.Example(article= "This is an article.", summary= "This is a summary.").with_inputs("article")

input_key_only = article_summary.inputs()
non_input_key_only = article_summary.labels()

print("Example object with Input fields only:", input_key_only)
print("Example object with Non-Input fields only:", non_input_key_only)
```

**Output**
```
Example object with Input fields only: Example({'article': 'This is an article.'}) (input_keys=None)
Example object with Non-Input fields only: Example({'summary': 'This is a summary.'}) (input_keys=None)
```

To exclude keys, use `without()`:

```python
article_summary = dspy.Example(context="This is an article.", question="This is a question?", answer="This is an answer.", rationale= "This is a rationale.").with_inputs("context", "question")

print("Example object without answer & rationale keys:", article_summary.without("answer", "rationale"))
```

**Output**
```
Example object without answer & rationale keys: Example({'context': 'This is an article.', 'question': 'This is a question?'}) (input_keys=None)
```

Updating values is simply assigning a new value using the `.` operator.

```python
article_summary.context = "new context"
```

## Iterating over Example

Iteration in the `Example` class also functions like a dictionary, supporting methods like `keys()`, `values()`, etc: 

```python
for k, v in article_summary.items():
    print(f"{k} = {v}")
```

**Output**

```text
context = This is an article.
question = This is a question?
answer = This is an answer.
rationale = This is a rationale.
```

```

--------------------------------------------------------------------------------
/tests/reliability/complex_types/generated/test_nesting_2/schema.json:
--------------------------------------------------------------------------------

```json
{
  "description": "This AI program is designed to process complex datasets with multiple nested input fields and produce structured output fields. It can handle cases where nested fields have the same name but different types, ensuring that the data is accurately processed and transformed. The program is particularly useful for applications that require detailed data analysis, integration of multiple data sources, and handling of heterogeneous data types.",
  "properties": {
    "customer": {
      "properties": {
        "customer_id": {
          "description": "Unique identifier for the customer",
          "type": "string"
        },
        "customer_type": {
          "description": "Indicates if the customer is a premium member",
          "type": "boolean"
        },
        "details": {
          "properties": {
            "age": {
              "description": "Customer's age",
              "type": "integer"
            },
            "value": {
              "description": "Customer's value category",
              "type": "string"
            }
          },
          "required": ["value", "age"],
          "type": "object"
        }
      },
      "required": ["customer_id", "customer_type", "details"],
      "type": "object"
    },
    "customer_summary": {
      "properties": {
        "customer_id": {
          "description": "Unique identifier for the customer",
          "type": "string"
        },
        "customer_type": {
          "properties": {
            "category": {
              "description": "Customer's membership category",
              "type": "string"
            },
            "is_premium": {
              "description": "Indicates if the customer is a premium member",
              "type": "boolean"
            }
          },
          "required": ["is_premium", "category"],
          "type": "object"
        },
        "value": {
          "description": "Customer's value category",
          "type": "string"
        }
      },
      "required": ["customer_id", "customer_type", "value"],
      "type": "object"
    },
    "transaction": {
      "properties": {
        "amount": {
          "description": "Transaction amount",
          "type": "number"
        },
        "details": {
          "properties": {
            "timestamp": {
              "description": "Timestamp of the transaction",
              "format": "date-time",
              "type": "string"
            },
            "value": {
              "description": "Monetary value of the transaction",
              "type": "number"
            }
          },
          "required": ["value", "timestamp"],
          "type": "object"
        },
        "transaction_id": {
          "description": "Unique identifier for the transaction",
          "type": "string"
        }
      },
      "required": ["transaction_id", "amount", "details"],
      "type": "object"
    },
    "transaction_summary": {
      "properties": {
        "details": {
          "properties": {
            "timestamp": {
              "description": "Timestamp of the transaction",
              "format": "date-time",
              "type": "string"
            },
            "value": {
              "description": "Monetary value of the transaction",
              "type": "number"
            }
          },
          "required": ["value", "timestamp"],
          "type": "object"
        },
        "total_amount": {
          "description": "Total transaction amount",
          "type": "number"
        },
        "transaction_id": {
          "description": "Unique identifier for the transaction",
          "type": "string"
        }
      },
      "required": ["transaction_id", "total_amount", "details"],
      "type": "object"
    }
  },
  "required": [
    "customer",
    "transaction",
    "customer_summary",
    "transaction_summary"
  ],
  "type": "object"
}

```

--------------------------------------------------------------------------------
/tests/clients/test_embedding.py:
--------------------------------------------------------------------------------

```python
from unittest.mock import patch

import numpy as np
import pytest

import dspy
from dspy.clients.embedding import Embedder


# Mock response format similar to litellm's embedding response.
class MockEmbeddingResponse:
    def __init__(self, embeddings):
        self.data = [{"embedding": emb} for emb in embeddings]
        self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
        self.model = "mock_model"
        self.object = "list"


@pytest.fixture
def cache(tmp_path):
    original_cache = dspy.cache
    dspy.configure_cache(disk_cache_dir=tmp_path / ".dspy_cache")
    yield
    dspy.cache = original_cache


def test_litellm_embedding(cache):
    model = "text-embedding-ada-002"
    inputs = ["hello", "world"]
    mock_embeddings = [
        [0.1, 0.2, 0.3],  # embedding for "hello"
        [0.4, 0.5, 0.6],  # embedding for "world"
    ]

    with patch("litellm.embedding") as mock_litellm:
        # Configure mock to return proper response format.
        mock_litellm.return_value = MockEmbeddingResponse(mock_embeddings)

        # Create embedding instance and call it.
        embedding = Embedder(model, caching=True)
        result = embedding(inputs)

        # Verify litellm was called with correct parameters.
        # Because we disable the litellm cache, it should be called with caching=False.
        mock_litellm.assert_called_once_with(model=model, input=inputs, caching=False)

        assert len(result) == len(inputs)
        np.testing.assert_allclose(result, mock_embeddings)

        # Second call should be cached.
        result = embedding(inputs)
        assert mock_litellm.call_count == 1
        np.testing.assert_allclose(result, mock_embeddings)

        # Disable cache should issue new calls.
        embedding = Embedder(model, caching=False)
        result = embedding(inputs)
        assert mock_litellm.call_count == 2
        np.testing.assert_allclose(result, mock_embeddings)


def test_callable_embedding(cache):
    inputs = ["hello", "world", "test"]

    expected_embeddings = [
        [0.1, 0.2, 0.3],  # embedding for "hello"
        [0.4, 0.5, 0.6],  # embedding for "world"
        [0.7, 0.8, 0.9],  # embedding for "test"
    ]

    class EmbeddingFn:
        def __init__(self):
            self.call_count = 0

        def __call__(self, texts):
            # Simple callable that returns random embeddings.
            self.call_count += 1
            return expected_embeddings

    embedding_fn = EmbeddingFn()

    # Create embedding instance with callable
    embedding = Embedder(embedding_fn)
    result = embedding(inputs)

    assert embedding_fn.call_count == 1
    np.testing.assert_allclose(result, expected_embeddings)

    result = embedding(inputs)
    # The second call should be cached.
    assert embedding_fn.call_count == 1
    np.testing.assert_allclose(result, expected_embeddings)


def test_invalid_model_type():
    # Test that invalid model type raises ValueError
    with pytest.raises(ValueError):
        embedding = Embedder(123)  # Invalid model type
        embedding(["test"])


@pytest.mark.asyncio
async def test_async_embedding():
    model = "text-embedding-ada-002"
    inputs = ["hello", "world"]
    mock_embeddings = [
        [0.1, 0.2, 0.3],  # embedding for "hello"
        [0.4, 0.5, 0.6],  # embedding for "world"
    ]

    with patch("litellm.aembedding") as mock_litellm:
        # Configure mock to return proper response format.
        mock_litellm.return_value = MockEmbeddingResponse(mock_embeddings)

        # Create embedding instance and call it.
        embedding = Embedder(model, caching=False)
        result = await embedding.acall(inputs)

        # Verify litellm was called with correct parameters.
        mock_litellm.assert_called_once_with(model=model, input=inputs, caching=False)

        assert len(result) == len(inputs)
        np.testing.assert_allclose(result, mock_embeddings)

```

--------------------------------------------------------------------------------
/dspy/datasets/dataset.py:
--------------------------------------------------------------------------------

```python
import random
import uuid

from dspy import Example
from dspy.dsp.utils import dotdict


class Dataset:
    def __init__(self, train_seed=0, train_size=None, eval_seed=0, dev_size=None, test_size=None, input_keys=None):
        self.train_size = train_size
        self.train_seed = train_seed
        self.dev_size = dev_size
        self.dev_seed = eval_seed
        self.test_size = test_size
        self.test_seed = eval_seed
        self.input_keys = input_keys or []

        self.do_shuffle = True

        self.name = self.__class__.__name__

    def reset_seeds(self, train_seed=None, train_size=None, eval_seed=None, dev_size=None, test_size=None):
        self.train_size = train_size or self.train_size
        self.train_seed = train_seed or self.train_seed
        self.dev_size = dev_size or self.dev_size
        self.dev_seed = eval_seed or self.dev_seed
        self.test_size = test_size or self.test_size
        self.test_seed = eval_seed or self.test_seed

        if hasattr(self, "_train_"):
            del self._train_

        if hasattr(self, "_dev_"):
            del self._dev_

        if hasattr(self, "_test_"):
            del self._test_

    @property
    def train(self):
        if not hasattr(self, "_train_"):
            self._train_ = self._shuffle_and_sample("train", self._train, self.train_size, self.train_seed)

        return self._train_

    @property
    def dev(self):
        if not hasattr(self, "_dev_"):
            self._dev_ = self._shuffle_and_sample("dev", self._dev, self.dev_size, self.dev_seed)

        return self._dev_

    @property
    def test(self):
        if not hasattr(self, "_test_"):
            self._test_ = self._shuffle_and_sample("test", self._test, self.test_size, self.test_seed)

        return self._test_

    def _shuffle_and_sample(self, split, data, size, seed=0):
        data = list(data)

        # Shuffle the data irrespective of the requested size.
        base_rng = random.Random(seed)

        if self.do_shuffle:
            base_rng.shuffle(data)

        data = data[:size]
        output = []

        for example in data:
            example_obj = Example(**example, dspy_uuid=str(uuid.uuid4()), dspy_split=split)
            if self.input_keys:
                example_obj = example_obj.with_inputs(*self.input_keys)
            output.append(example_obj)
        # TODO: NOTE: Ideally we use these uuids for dedup internally, for demos and internal train/val splits.
        # Now, some tasks (like convQA and Colors) have overlapping examples. Here, we should allow the user to give us
        # a uuid field that would respect this in some way. This means that we need a more refined concept that
        # uuid (each example is unique) and more like a group_uuid.

        return output

    @classmethod
    def prepare_by_seed(
        cls,
        train_seeds=None,
        train_size=16,
        dev_size=1000,
        divide_eval_per_seed=True,
        eval_seed=2023,
        **kwargs,
    ):
        train_seeds = train_seeds or [1, 2, 3, 4, 5]
        data_args = dotdict(train_size=train_size, eval_seed=eval_seed, dev_size=dev_size, test_size=0, **kwargs)
        dataset = cls(**data_args)

        eval_set = dataset.dev
        eval_sets, train_sets = [], []

        examples_per_seed = dev_size // len(train_seeds) if divide_eval_per_seed else dev_size
        eval_offset = 0

        for train_seed in train_seeds:
            data_args.train_seed = train_seed
            dataset.reset_seeds(**data_args)

            eval_sets.append(eval_set[eval_offset : eval_offset + examples_per_seed])
            train_sets.append(dataset.train)

            assert len(eval_sets[-1]) == examples_per_seed, len(eval_sets[-1])
            assert len(train_sets[-1]) == train_size, len(train_sets[-1])

            if divide_eval_per_seed:
                eval_offset += examples_per_seed

        return dotdict(train_sets=train_sets, eval_sets=eval_sets)

```

--------------------------------------------------------------------------------
/dspy/adapters/xml_adapter.py:
--------------------------------------------------------------------------------

```python
import re
from typing import Any

from pydantic.fields import FieldInfo

from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName
from dspy.adapters.utils import format_field_value, translate_field_type
from dspy.signatures.signature import Signature
from dspy.utils.callback import BaseCallback


class XMLAdapter(ChatAdapter):
    def __init__(self, callbacks: list[BaseCallback] | None = None):
        super().__init__(callbacks)
        self.field_pattern = re.compile(r"<(?P<name>\w+)>((?P<content>.*?))</\1>", re.DOTALL)

    def format_field_with_value(self, fields_with_values: dict[FieldInfoWithName, Any]) -> str:
        output = []
        for field, field_value in fields_with_values.items():
            formatted = format_field_value(field_info=field.info, value=field_value)
            output.append(f"<{field.name}>\n{formatted}\n</{field.name}>")
        return "\n\n".join(output).strip()

    def format_field_structure(self, signature: type[Signature]) -> str:
        """
        XMLAdapter requires input and output fields to be wrapped in XML tags like `<field_name>`.
        """

        parts = []
        parts.append("All interactions will be structured in the following way, with the appropriate values filled in.")

        def format_signature_fields_for_instructions(fields: dict[str, FieldInfo]):
            return self.format_field_with_value(
                fields_with_values={
                    FieldInfoWithName(name=field_name, info=field_info): translate_field_type(field_name, field_info)
                    for field_name, field_info in fields.items()
                },
            )

        parts.append(format_signature_fields_for_instructions(signature.input_fields))
        parts.append(format_signature_fields_for_instructions(signature.output_fields))
        return "\n\n".join(parts).strip()

    def format_assistant_message_content(
        self,
        signature: type[Signature],
        outputs: dict[str, Any],
        missing_field_message=None,
    ) -> str:
        return self.format_field_with_value(
            {
                FieldInfoWithName(name=k, info=v): outputs.get(k, missing_field_message)
                for k, v in signature.output_fields.items()
            },
        )

    def user_message_output_requirements(self, signature: type[Signature]) -> str:
        message = "Respond with the corresponding output fields wrapped in XML tags "
        message += ", then ".join(f"`<{f}>`" for f in signature.output_fields)
        message += "."
        return message

    def parse(self, signature: type[Signature], completion: str) -> dict[str, Any]:
        fields = {}
        for match in self.field_pattern.finditer(completion):
            name = match.group("name")
            content = match.group("content").strip()
            if name in signature.output_fields and name not in fields:
                fields[name] = content
        # Cast values using base class parse_value helper
        for k, v in fields.items():
            fields[k] = self._parse_field_value(signature.output_fields[k], v, completion, signature)
        if fields.keys() != signature.output_fields.keys():
            from dspy.utils.exceptions import AdapterParseError

            raise AdapterParseError(
                adapter_name="XMLAdapter",
                signature=signature,
                lm_response=completion,
                parsed_result=fields,
            )
        return fields

    def _parse_field_value(self, field_info, raw, completion, signature):
        from dspy.adapters.utils import parse_value

        try:
            return parse_value(raw, field_info.annotation)
        except Exception as e:
            from dspy.utils.exceptions import AdapterParseError

            raise AdapterParseError(
                adapter_name="XMLAdapter",
                signature=signature,
                lm_response=completion,
                message=f"Failed to parse field {field_info} with value {raw}: {e}",
            )

```

--------------------------------------------------------------------------------
/docs/scripts/generate_api_summary.py:
--------------------------------------------------------------------------------

```python
from pathlib import Path

INDEX_NAME = {
    "models": "Models",
    "primitives": "Primitives",
    "signatures": "Signatures",
    "adapters": "Adapters",
    "modules": "Modules",
    "evaluation": "Evaluation",
    "optimizers": "Optimizers",
    "utils": "Utils",
    "tools": "Tools",
    "experimental": "Experimental",
}


def build_nav_structure(directory: Path, base_path: Path) -> dict:
    """Recursively build navigation structure for a directory."""
    nav = {}

    # Get all items in current directory
    items = sorted(directory.iterdir())

    for path in items:
        if path.suffix == ".md":
            name = path.stem
            nav[name] = str(path.relative_to(base_path))
        elif path.is_dir() and path.name == "GEPA":
            nav["GEPA"] = {
                "2. GEPA Advanced": "api/optimizers/GEPA/GEPA_Advanced.md",
                "1. GEPA Overview": "api/optimizers/GEPA/overview.md",
            }

    return nav


def format_nav_section(nav_dict, indent_level=2):
    """Convert dictionary to properly indented nav section"""
    lines = []
    indent = "    " * indent_level

    module_navs = []
    file_navs = []
    for key, value in sorted(nav_dict.items()):
        if isinstance(value, dict):
            # This is a section
            module_navs.append(f"{indent}- {key}:")
            module_navs.extend(format_nav_section(value, indent_level + 1))
        else:
            # This is a file
            file_navs.append(f"{indent}- {key}: {value}")

    # Put submodules' nav items before file nav items. e.g., `dspy.evaluate` before `dspy.ChainOfThought`
    # in the nav section.
    lines.extend(module_navs)
    lines.extend(file_navs)

    return lines


def read_mkdocs_sections(filename: str = "mkdocs.yml"):
    """Read and parse the mkdocs.yml file into sections."""
    with open(filename, "r") as f:
        lines = f.readlines()

    nav_start = -1
    theme_start = -1

    # Find section boundaries
    for i, line in enumerate(lines):
        if line.strip() == "nav:":
            nav_start = i
        elif line.strip() == "theme:":
            theme_start = i
            break

    # Split content into sections
    pre_nav = lines[: nav_start + 1]  # Include the 'nav:' line
    nav_content = []
    post_theme = lines[theme_start:]  # Start from 'theme:' line

    # Extract nav content excluding API Reference
    i = nav_start + 1
    while i < theme_start:
        line = lines[i]
        if line.strip() == "- API Reference:":
            # Skip this line and all indented lines that follow
            i += 1
            while i < theme_start and (not lines[i].strip() or lines[i].startswith(" " * 8)):
                i += 1
        else:
            nav_content.append(line)
            i += 1

    return pre_nav, nav_content, post_theme


def generate_api_nav():
    """Generate the API navigation structure."""
    api_nav = {}
    api_path = Path("docs/api")

    for dir_path in sorted(api_path.iterdir()):
        if dir_path.is_dir():
            category = INDEX_NAME[dir_path.name]
            api_nav[category] = build_nav_structure(dir_path, Path("docs"))

    return api_nav


def main():
    """Main function to generate the API documentation summary."""
    # Read existing mkdocs.yml sections
    pre_nav, nav_content, post_theme = read_mkdocs_sections()

    # Generate API navigation structure
    api_nav = generate_api_nav()

    # Create API section
    api_section = ["    - API Reference:"]
    api_section.append("        - API Reference: api/index.md")
    api_section.extend(format_nav_section(api_nav))
    api_section.append("")  # Add empty line before theme section

    # Write back to mkdocs.yml
    with open("mkdocs.yml", "w") as f:
        # Write pre-nav content
        f.writelines(pre_nav)
        # Write nav content
        f.writelines(nav_content)
        # Add API section
        f.write("\n".join(api_section) + "\n")
        # Write post-theme content
        f.writelines(post_theme)


if __name__ == "__main__":
    main()

```

--------------------------------------------------------------------------------
/dspy/primitives/example.py:
--------------------------------------------------------------------------------

```python
class Example:
    def __init__(self, base=None, **kwargs):
        # Internal storage and other attributes
        self._store = {}
        self._demos = []
        self._input_keys = None

        # Initialize from a base Example if provided
        if base and isinstance(base, type(self)):
            self._store = base._store.copy()

        # Initialize from a dict if provided
        elif base and isinstance(base, dict):
            self._store = base.copy()

        # Update with provided kwargs
        self._store.update(kwargs)

    def __getattr__(self, key):
        if key.startswith("__") and key.endswith("__"):
            raise AttributeError
        if key in self._store:
            return self._store[key]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{key}'")

    def __setattr__(self, key, value):
        if key.startswith("_") or key in dir(self.__class__):
            super().__setattr__(key, value)
        else:
            self._store[key] = value

    def __getitem__(self, key):
        return self._store[key]

    def __setitem__(self, key, value):
        self._store[key] = value

    def __delitem__(self, key):
        del self._store[key]

    def __contains__(self, key):
        return key in self._store

    def __len__(self):
        return len([k for k in self._store if not k.startswith("dspy_")])

    def __repr__(self):
        # return f"Example({self._store})" + f" (input_keys={self._input_keys}, demos={self._demos})"
        d = {k: v for k, v in self._store.items() if not k.startswith("dspy_")}
        return f"Example({d})" + f" (input_keys={self._input_keys})"

    def __str__(self):
        return self.__repr__()

    def __eq__(self, other):
        return isinstance(other, Example) and self._store == other._store

    def __hash__(self):
        return hash(tuple(self._store.items()))

    def keys(self, include_dspy=False):
        return [k for k in self._store.keys() if not k.startswith("dspy_") or include_dspy]

    def values(self, include_dspy=False):
        return [v for k, v in self._store.items() if not k.startswith("dspy_") or include_dspy]

    def items(self, include_dspy=False):
        return [(k, v) for k, v in self._store.items() if not k.startswith("dspy_") or include_dspy]

    def get(self, key, default=None):
        return self._store.get(key, default)

    def with_inputs(self, *keys):
        copied = self.copy()
        copied._input_keys = set(keys)
        return copied

    def inputs(self):
        if self._input_keys is None:
            raise ValueError("Inputs have not been set for this example. Use `example.with_inputs()` to set them.")

        # return items that are in input_keys
        d = {key: self._store[key] for key in self._store if key in self._input_keys}
        # return type(self)(d)
        new_instance = type(self)(base=d)
        new_instance._input_keys = self._input_keys  # Preserve input_keys in new instance
        return new_instance

    def labels(self):
        # return items that are NOT in input_keys
        input_keys = self.inputs().keys()
        d = {key: self._store[key] for key in self._store if key not in input_keys}
        return type(self)(d)

    def __iter__(self):
        return iter(dict(self._store))

    def copy(self, **kwargs):
        return type(self)(base=self, **kwargs)

    def without(self, *keys):
        copied = self.copy()
        for key in keys:
            del copied[key]
        return copied

    def toDict(self):  # noqa: N802
        def convert_to_serializable(value):
            if hasattr(value, "toDict"):
                return value.toDict()
            elif isinstance(value, list):
                return [convert_to_serializable(item) for item in value]
            elif isinstance(value, dict):
                return {k: convert_to_serializable(v) for k, v in value.items()}
            else:
                return value

        serializable_store = {}
        for k, v in self._store.items():
            serializable_store[k] = convert_to_serializable(v)

        return serializable_store

```

--------------------------------------------------------------------------------
/tests/signatures/test_custom_types.py:
--------------------------------------------------------------------------------

```python

import pydantic
import pytest

import dspy
from dspy import Signature


def test_basic_custom_type_resolution():
    """Test basic custom type resolution with both explicit and automatic mapping."""
    class CustomType(pydantic.BaseModel):
        value: str

    # Custom types can be explicitly mapped
    explicit_sig = Signature(
        "input: CustomType -> output: str",
        custom_types={"CustomType": CustomType}
    )
    assert explicit_sig.input_fields["input"].annotation == CustomType

    # Custom types can also be auto-resolved from caller's scope
    auto_sig = Signature("input: CustomType -> output: str")
    assert auto_sig.input_fields["input"].annotation == CustomType


def test_type_alias_for_nested_types():
    """Test using type aliases for nested types."""
    class Container:
        class NestedType(pydantic.BaseModel):
            value: str

    NestedType = Container.NestedType
    alias_sig = Signature("input: str -> output: NestedType")
    assert alias_sig.output_fields["output"].annotation == Container.NestedType

    class Container2:
        class Query(pydantic.BaseModel):
            text: str
        class Score(pydantic.BaseModel):
            score: float

    signature = dspy.Signature("query: Container2.Query -> score: Container2.Score")
    assert signature.output_fields["score"].annotation == Container2.Score


class GlobalCustomType(pydantic.BaseModel):
    """A type defined at module level for testing module-level resolution."""
    value: str
    notes: str = ""


def test_module_level_type_resolution():
    """Test resolution of types defined at module level."""
    # Module-level types can be auto-resolved
    sig = Signature("name: str -> result: GlobalCustomType")
    assert sig.output_fields["result"].annotation == GlobalCustomType


# Create module-level nested class for testing
class OuterContainer:
    class InnerType(pydantic.BaseModel):
        name: str
        value: int


def test_recommended_patterns():
    """Test recommended patterns for working with custom types in signatures."""

    # PATTERN 1: Local type with auto-resolution
    class LocalType(pydantic.BaseModel):
        value: str

    sig1 = Signature("input: str -> output: LocalType")
    assert sig1.output_fields["output"].annotation == LocalType

    # PATTERN 2: Module-level type with auto-resolution
    sig2 = Signature("input: str -> output: GlobalCustomType")
    assert sig2.output_fields["output"].annotation == GlobalCustomType

    # PATTERN 3: Nested type with dot notation
    sig3 = Signature("input: str -> output: OuterContainer.InnerType")
    assert sig3.output_fields["output"].annotation == OuterContainer.InnerType

    # PATTERN 4: Nested type using alias
    InnerTypeAlias = OuterContainer.InnerType
    sig4 = Signature("input: str -> output: InnerTypeAlias")
    assert sig4.output_fields["output"].annotation == InnerTypeAlias

    # PATTERN 5: Nested type with dot notation
    sig5 = Signature("input: str -> output: OuterContainer.InnerType")
    assert sig5.output_fields["output"].annotation == OuterContainer.InnerType

def test_expected_failure():
    # InnerType DNE when not OuterContainer.InnerTypes, so this type shouldnt be resolved
    with pytest.raises(ValueError):
        Signature("input: str -> output: InnerType")

def test_module_type_resolution():
    class TestModule(dspy.Module):
        def __init__(self):
            super().__init__()
            self.predict = dspy.Predict("input: str -> output: OuterContainer.InnerType")

        def predict(self, input: str) -> str:
            return input

    module = TestModule()
    sig = module.predict.signature
    assert sig.output_fields["output"].annotation == OuterContainer.InnerType

def test_basic_custom_type_resolution():
    class CustomType(pydantic.BaseModel):
        value: str

    sig = Signature("input: CustomType -> output: str", custom_types={"CustomType": CustomType})
    assert sig.input_fields["input"].annotation == CustomType

    sig = Signature("input: CustomType -> output: str")
    assert sig.input_fields["input"].annotation == CustomType

```

--------------------------------------------------------------------------------
/docs/overrides/home.html:
--------------------------------------------------------------------------------

```html
{% extends "base.html" %}

{% block content %}
<style>
    .md-main__inner .md-grid {
        padding: 0;
        margin: 0;
    }

    .content-container {
        max-width: 100%;
        margin: 0;
        padding: 0;
    }
    
    .hero {
        text-align: center;
        padding: 4rem 2rem;
        margin: 0;
        background-color: #f5f6f77a;
        color: white;
    }
    
    .hero-logo {
        max-width: 15rem;
        height: auto;
        margin: 0 auto;
    }
    
    .hero-subtitle {
        font-size: 1.2rem;
        margin: 1.5rem 0;
        color: #e2e8f0;
    }
    
    .cta-button {
        display: inline-block;
        padding: 0.75rem 1.5rem;
        background-color: transparent;
        color: black;
        text-decoration: none;
        border-radius: 0.375rem;
        font-weight: 600;
        border: 2px solid black;
        transition: all 0.3s ease;
    }
    
    .cta-button:hover {
        background-color: white;
        color: black;
        border: 2px solid white;
    }
    
    .features-section {
        padding: 4rem 2rem;
    }
    
    .features-title {
        text-align: center;
        font-size: 2rem;
        font-weight: 700;
        margin-bottom: 3rem;
        color: #1a202c;
    }
    
    .features-grid {
        display: grid;
        grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
        gap: 3rem;
        max-width: 1200px;
        margin: 0 auto;
    }
    
    .feature-card {
        text-align: center;
        padding: 1.5rem;
    }
    
    .feature-image {
        width: 10rem;
        height: auto;
        margin: 0 auto 1.5rem;
    }
    
    .feature-title {
        font-size: 1.25rem;
        font-weight: 700;
        margin-bottom: 1rem;
        color: #2d3748;
    }
    
    .feature-description {
        color: #4a5568;
        line-height: 1.5;
    }
    
    @media (max-width: 768px) {
        .hero {
            padding: 3rem 1rem;
        }
        
        .hero-logo {
            max-width: 10rem;
        }
        
        .features-grid {
            grid-template-columns: 1fr;
            gap: 2rem;
        }
        
        .feature-card {
            padding: 1rem;
        }
    }
</style>

<div class="content-container">
    <div class="hero">
        <img src="{{ 'static/img/dspy_logo.png' | url }}" alt="DSPy Logo" class="hero-logo">
        <p class="hero-subtitle">Programming—not prompting—Language Models</p>
        <a href="{{ 'quick-start/getting-started-1' | url }}" class="cta-button">Get Started with DSPy</a>
    </div>

    <div class="features-section">
        <h2 class="features-title">The Way of DSPy</h2>
        <div class="features-grid">
            <div class="feature-card">
                <img src="{{ 'static/img/optimize.png' | url }}" alt="Systematic Optimization" class="feature-image">
                <h3 class="feature-title">Systematic Optimization</h3>
                <p class="feature-description">Choose from a range of optimizers to enhance your program. Whether it's generating refined instructions, or fine-tuning weights, DSPy's optimizers are engineered to maximize efficiency and effectiveness.</p>
            </div>
            
            <div class="feature-card">
                <img src="{{ 'static/img/modular.png' | url }}" alt="Modular Approach" class="feature-image">
                <h3 class="feature-title">Modular Approach</h3>
                <p class="feature-description">With DSPy, you can build your system using predefined modules, replacing intricate prompting techniques with straightforward, effective solutions.</p>
            </div>
            
            <div class="feature-card">
                <img src="{{ 'static/img/universal_compatibility.png' | url }}" alt="Cross-LM Compatibility" class="feature-image">
                <h3 class="feature-title">Cross-LM Compatibility</h3>
                <p class="feature-description">Whether you're working with powerhouse models like GPT-3.5 or GPT-4, or local models such as T5-base or Llama2-13b, DSPy seamlessly integrates and enhances their performance in your system.</p>
            </div>
        </div>
    </div>
</div>
{% endblock %}
```

--------------------------------------------------------------------------------
/docs/docs/api/optimizers/MIPROv2.md:
--------------------------------------------------------------------------------

```markdown
# dspy.MIPROv2

`MIPROv2` (<u>M</u>ultiprompt <u>I</u>nstruction <u>PR</u>oposal <u>O</u>ptimizer Version 2) is an prompt optimizer capable of optimizing both instructions and few-shot examples jointly. It does this by bootstrapping few-shot example candidates, proposing instructions grounded in different dynamics of the task, and finding an optimized combination of these options using Bayesian Optimization. It can be used for optimizing few-shot examples & instructions jointly, or just instructions for 0-shot optimization.

<!-- START_API_REF -->
::: dspy.MIPROv2
    handler: python
    options:
        members:
            - compile
            - get_params
        show_source: true
        show_root_heading: true
        heading_level: 2
        docstring_style: google
        show_root_full_path: true
        show_object_full_path: false
        separate_signature: false
        inherited_members: true
:::
<!-- END_API_REF -->

## Example Usage

The program below shows optimizing a math program with MIPROv2

```python
import dspy
from dspy.datasets.gsm8k import GSM8K, gsm8k_metric

# Import the optimizer
from dspy.teleprompt import MIPROv2

# Initialize the LM
lm = dspy.LM('openai/gpt-4o-mini', api_key='YOUR_OPENAI_API_KEY')
dspy.configure(lm=lm)

# Initialize optimizer
teleprompter = MIPROv2(
    metric=gsm8k_metric,
    auto="medium", # Can choose between light, medium, and heavy optimization runs
)

# Optimize program
print(f"Optimizing program with MIPROv2...")
gsm8k = GSM8K()
optimized_program = teleprompter.compile(
    dspy.ChainOfThought("question -> answer"),
    trainset=gsm8k.train,
)

# Save optimize program for future use
optimized_program.save(f"optimized.json")
```

## How `MIPROv2` works

At a high level, `MIPROv2` works by creating both few-shot examples and new instructions for each predictor in your LM program, and then searching over these using Bayesian Optimization to find the best combination of these variables for your program.  If you want a visual explanation check out this [twitter thread](https://x.com/michaelryan207/status/1804189184988713065).

These steps are broken down in more detail below:

1) **Bootstrap Few-Shot Examples**: Randomly samples examples from your training set, and run them through your LM program. If the output from the program is correct for this example, it is kept as a valid few-shot example candidate. Otherwise, we try another example until we've curated the specified amount of few-shot example candidates. This step creates `num_candidates` sets of `max_bootstrapped_demos` bootstrapped examples and `max_labeled_demos` basic examples sampled from the training set.

2) **Propose Instruction Candidates**. The instruction proposer includes (1) a generated summary of properties of the training dataset, (2) a generated summary of your LM program's code and the specific predictor that an instruction is being generated for, (3) the previously bootstrapped few-shot examples to show reference inputs / outputs for a given predictor and (4) a randomly sampled tip for generation (i.e. "be creative", "be concise", etc.) to help explore the feature space of potential instructions.  This context is provided to a `prompt_model` which writes high quality instruction candidates.

3) **Find an Optimized Combination of Few-Shot Examples & Instructions**. Finally, we use Bayesian Optimization to choose which combinations of instructions and demonstrations work best for each predictor in our program. This works by running a series of `num_trials` trials, where a new set of prompts are evaluated over our validation set at each trial. The new set of prompts are only evaluated on a minibatch of size `minibatch_size` at each trial (when `minibatch`=`True`). The best averaging set of prompts is then evaluated on the full validation set every `minibatch_full_eval_steps`. At the end of the optimization process, the LM program with the set of prompts that performed best on the full validation set is returned.

For those interested in more details, more information on `MIPROv2` along with a study on `MIPROv2` compared with other DSPy optimizers can be found in [this paper](https://arxiv.org/abs/2406.11695).

```

--------------------------------------------------------------------------------
/tests/utils/test_mcp.py:
--------------------------------------------------------------------------------

```python
import asyncio
import importlib

import pytest

from dspy.utils.mcp import convert_mcp_tool

if importlib.util.find_spec("mcp") is None:
    pytest.skip(reason="mcp is not installed", allow_module_level=True)


@pytest.mark.asyncio
@pytest.mark.extra
async def test_convert_mcp_tool():
    from mcp import ClientSession, StdioServerParameters
    from mcp.client.stdio import stdio_client
    server_params = StdioServerParameters(
        command="python",
        args=["tests/utils/resources/mcp_server.py"],
        env=None,
    )
    async with stdio_client(server_params) as (read, write):
        async with ClientSession(read, write) as session:
            await asyncio.wait_for(session.initialize(), timeout=5)
            response = await session.list_tools()

            # Check add
            add_tool = convert_mcp_tool(session, response.tools[0])
            assert add_tool.name == "add"
            assert add_tool.desc == "Add two numbers"
            assert add_tool.args == {"a": {"title": "A", "type": "integer"}, "b": {"title": "B", "type": "integer"}}
            assert add_tool.arg_types == {"a": int, "b": int}
            assert add_tool.arg_desc == {
                "a": "No description provided. (Required)",
                "b": "No description provided. (Required)",
            }
            assert await add_tool.acall(a=1, b=2) == "3"

            # Check hello
            hello_tool = convert_mcp_tool(session, response.tools[1])
            assert hello_tool.name == "hello"
            assert hello_tool.desc == "Greet people"
            assert hello_tool.args == {"names": {"title": "Names", "type": "array", "items": {"type": "string"}}}
            assert hello_tool.arg_types == {"names": list}
            assert hello_tool.arg_desc == {"names": "No description provided. (Required)"}
            assert await hello_tool.acall(names=["Bob", "Tom"]) == ["Hello, Bob!", "Hello, Tom!"]

            # Check error handling
            error_tool = convert_mcp_tool(session, response.tools[2])
            assert error_tool.name == "wrong_tool"
            assert error_tool.desc == "This tool raises an error"
            with pytest.raises(
                RuntimeError, match="Failed to call a MCP tool: Error executing tool wrong_tool: error!"
            ):
                await error_tool.acall()

            # Check nested Pydantic arg
            nested_pydantic_tool = convert_mcp_tool(session, response.tools[3])

            assert nested_pydantic_tool.name == "get_account_name"
            assert nested_pydantic_tool.desc == "This extracts the name from account"
            assert nested_pydantic_tool.args == {
                "account": {
                    "title": "Account",
                    "type": "object",
                    "required": ["profile", "account_id"],
                    "properties": {
                        "profile": {
                            "title": "Profile",
                            "type": "object",
                            "properties": {
                                "name": {"title": "Name", "type": "string"},
                                "age": {"title": "Age", "type": "integer"},
                            },
                            "required": ["name", "age"],
                        },
                        "account_id": {"title": "Account Id", "type": "string"},
                    },
                }
            }
            account_in_json = {
                "profile": {
                    "name": "Bob",
                    "age": 20,
                },
                "account_id": "123",
            }
            result = await nested_pydantic_tool.acall(account=account_in_json)
            assert result == "Bob"

            # Check no input parameter current_datetime tool
            current_datetime_tool = convert_mcp_tool(session, response.tools[4])
            assert current_datetime_tool.name == "current_datetime"
            assert current_datetime_tool.desc == "Get the current datetime"
            assert current_datetime_tool.args == {}
            assert current_datetime_tool.arg_types == {}
            assert current_datetime_tool.arg_desc == {}
            assert await current_datetime_tool.acall() == "2025-07-23T09:10:10.0+00:00"

```

--------------------------------------------------------------------------------
/docs/docs/tutorials/async/index.md:
--------------------------------------------------------------------------------

```markdown
# Async DSPy Programming

DSPy provides native support for asynchronous programming, allowing you to build more efficient and
scalable applications. This guide will walk you through how to leverage async capabilities in DSPy,
covering both built-in modules and custom implementations.

## Why Use Async in DSPy?

Asynchronous programming in DSPy offers several benefits:
- Improved performance through concurrent operations
- Better resource utilization
- Reduced waiting time for I/O-bound operations
- Enhanced scalability for handling multiple requests

## When Should I use Sync or Async?

Choosing between synchronous and asynchronous programming in DSPy depends on your specific use case.
Here's a guide to help you make the right choice:

Use Synchronous Programming When

- You're exploring or prototyping new ideas
- You're conducting research or experiments
- You're building small to medium-sized applications
- You need simpler, more straightforward code
- You want easier debugging and error tracking

Use Asynchronous Programming When:

- You're building a high-throughput service (high QPS)
- You're working with tools that only support async operations
- You need to handle multiple concurrent requests efficiently
- You're building a production service that requires high scalability

### Important Considerations

While async programming offers performance benefits, it comes with some trade-offs:

- More complex error handling and debugging
- Potential for subtle, hard-to-track bugs
- More complex code structure
- Different code between ipython (Colab, Jupyter lab, Databricks notebooks, ...) and normal python runtime.

We recommend starting with synchronous programming for most development scenarios and switching to async
only when you have a clear need for its benefits. This approach allows you to focus on the core logic of
your application before dealing with the additional complexity of async programming.

## Using Built-in Modules Asynchronously

Most DSPy built-in modules support asynchronous operations through the `acall()` method. This method
maintains the same interface as the synchronous `__call__` method but operates asynchronously.

Here's a basic example using `dspy.Predict`:

```python
import dspy
import asyncio
import os

os.environ["OPENAI_API_KEY"] = "your_api_key"

dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))
predict = dspy.Predict("question->answer")

async def main():
    # Use acall() for async execution
    output = await predict.acall(question="why did a chicken cross the kitchen?")
    print(output)


asyncio.run(main())
```

### Working with Async Tools

DSPy's `Tool` class seamlessly integrates with async functions. When you provide an async
function to `dspy.Tool`, you can execute it using `acall()`. This is particularly useful
for I/O-bound operations or when working with external services.

```python
import asyncio
import dspy
import os

os.environ["OPENAI_API_KEY"] = "your_api_key"

async def foo(x):
    # Simulate an async operation
    await asyncio.sleep(0.1)
    print(f"I get: {x}")

# Create a tool from the async function
tool = dspy.Tool(foo)

async def main():
    # Execute the tool asynchronously
    await tool.acall(x=2)

asyncio.run(main())
```

Note: When using `dspy.ReAct` with tools, calling `acall()` on the ReAct instance will automatically
execute all tools asynchronously using their `acall()` methods.

## Creating Custom Async DSPy Modules

To create your own async DSPy module, implement the `aforward()` method instead of `forward()`. This method
should contain your module's async logic. Here's an example of a custom module that chains two async operations:

```python
import dspy
import asyncio
import os

os.environ["OPENAI_API_KEY"] = "your_api_key"
dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))

class MyModule(dspy.Module):
    def __init__(self):
        self.predict1 = dspy.ChainOfThought("question->answer")
        self.predict2 = dspy.ChainOfThought("answer->simplified_answer")

    async def aforward(self, question, **kwargs):
        # Execute predictions sequentially but asynchronously
        answer = await self.predict1.acall(question=question)
        return await self.predict2.acall(answer=answer)


async def main():
    mod = MyModule()
    result = await mod.acall(question="Why did a chicken cross the kitchen?")
    print(result)


asyncio.run(main())
```

```

--------------------------------------------------------------------------------
/tests/reliability/complex_types/generated/test_many_types_1/schema.json:
--------------------------------------------------------------------------------

```json
{
  "description": "The program is designed to process various data types including tuples, enums, datetime values, literals, objects, and nested objects containing these types. The program will accept inputs of these types, perform specified operations on them, and return the results. The operations could include validation, transformation, and extraction of information from these inputs.",
  "properties": {
    "datetimeField": {
      "desc": null,
      "format": "date-time",
      "prefix": "Datetime Field:",
      "type": "string"
    },
    "enumField": {
      "enum": ["option1", "option2", "option3"],
      "type": "string"
    },
    "literalField": {
      "const": "literalValue",
      "enum": ["literalValue"],
      "type": "string"
    },
    "nestedObjectField": {
      "properties": {
        "datetimeField": {
          "format": "date-time",
          "type": "string"
        },
        "enumField": {
          "enum": ["option1", "option2", "option3"],
          "type": "string"
        },
        "literalField": {
          "const": "literalValue",
          "enum": ["literalValue"],
          "type": "string"
        },
        "tupleField": {
          "items": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "number"
              }
            ]
          },
          "maxItems": 2,
          "minItems": 2,
          "type": "array"
        }
      },
      "required": ["tupleField", "enumField", "datetimeField", "literalField"],
      "type": "object"
    },
    "objectField": {
      "properties": {
        "subField1": {
          "type": "string"
        },
        "subField2": {
          "type": "number"
        }
      },
      "required": ["subField1", "subField2"],
      "type": "object"
    },
    "processedDatetimeField": {
      "desc": null,
      "format": "date-time",
      "prefix": "Processed Datetime Field:",
      "type": "string"
    },
    "processedEnumField": {
      "enum": ["option1", "option2", "option3"],
      "type": "string"
    },
    "processedLiteralField": {
      "const": "literalValue",
      "enum": ["literalValue"],
      "type": "string"
    },
    "processedNestedObjectField": {
      "properties": {
        "additionalField": {
          "type": "boolean"
        },
        "datetimeField": {
          "format": "date-time",
          "type": "string"
        },
        "enumField": {
          "enum": ["option1", "option2", "option3"],
          "type": "string"
        },
        "literalField": {
          "const": "literalValue",
          "enum": ["literalValue"],
          "type": "string"
        },
        "tupleField": {
          "items": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "number"
              }
            ]
          },
          "maxItems": 2,
          "minItems": 2,
          "type": "array"
        }
      },
      "required": [
        "tupleField",
        "enumField",
        "datetimeField",
        "literalField",
        "additionalField"
      ],
      "type": "object"
    },
    "processedObjectField": {
      "properties": {
        "additionalField": {
          "type": "boolean"
        },
        "subField1": {
          "type": "string"
        },
        "subField2": {
          "type": "number"
        }
      },
      "required": ["subField1", "subField2", "additionalField"],
      "type": "object"
    },
    "processedTupleField": {
      "desc": null,
      "items": {
        "anyOf": [
          {
            "type": "string"
          },
          {
            "type": "number"
          }
        ]
      },
      "prefix": "Processed Tuple Field:",
      "type": "array"
    },
    "tupleField": {
      "desc": null,
      "items": {
        "anyOf": [
          {
            "type": "string"
          },
          {
            "type": "number"
          }
        ]
      },
      "prefix": "Tuple Field:",
      "type": "array"
    }
  },
  "required": [
    "tupleField",
    "enumField",
    "datetimeField",
    "literalField",
    "objectField",
    "nestedObjectField",
    "processedTupleField",
    "processedEnumField",
    "processedDatetimeField",
    "processedLiteralField",
    "processedObjectField",
    "processedNestedObjectField"
  ],
  "type": "object"
}

```