This is page 10 of 12. Use http://codebase.md/getzep/graphiti?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .env.example
├── .github
│ ├── dependabot.yml
│ ├── ISSUE_TEMPLATE
│ │ └── bug_report.md
│ ├── pull_request_template.md
│ ├── secret_scanning.yml
│ └── workflows
│ ├── ai-moderator.yml
│ ├── cla.yml
│ ├── claude-code-review-manual.yml
│ ├── claude-code-review.yml
│ ├── claude.yml
│ ├── codeql.yml
│ ├── lint.yml
│ ├── release-graphiti-core.yml
│ ├── release-mcp-server.yml
│ ├── release-server-container.yml
│ ├── typecheck.yml
│ └── unit_tests.yml
├── .gitignore
├── AGENTS.md
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── conftest.py
├── CONTRIBUTING.md
├── depot.json
├── docker-compose.test.yml
├── docker-compose.yml
├── Dockerfile
├── ellipsis.yaml
├── examples
│ ├── azure-openai
│ │ ├── .env.example
│ │ ├── azure_openai_neo4j.py
│ │ └── README.md
│ ├── data
│ │ └── manybirds_products.json
│ ├── ecommerce
│ │ ├── runner.ipynb
│ │ └── runner.py
│ ├── langgraph-agent
│ │ ├── agent.ipynb
│ │ └── tinybirds-jess.png
│ ├── opentelemetry
│ │ ├── .env.example
│ │ ├── otel_stdout_example.py
│ │ ├── pyproject.toml
│ │ ├── README.md
│ │ └── uv.lock
│ ├── podcast
│ │ ├── podcast_runner.py
│ │ ├── podcast_transcript.txt
│ │ └── transcript_parser.py
│ ├── quickstart
│ │ ├── dense_vs_normal_ingestion.py
│ │ ├── quickstart_falkordb.py
│ │ ├── quickstart_neo4j.py
│ │ ├── quickstart_neptune.py
│ │ ├── README.md
│ │ └── requirements.txt
│ └── wizard_of_oz
│ ├── parser.py
│ ├── runner.py
│ └── woo.txt
├── graphiti_core
│ ├── __init__.py
│ ├── cross_encoder
│ │ ├── __init__.py
│ │ ├── bge_reranker_client.py
│ │ ├── client.py
│ │ ├── gemini_reranker_client.py
│ │ └── openai_reranker_client.py
│ ├── decorators.py
│ ├── driver
│ │ ├── __init__.py
│ │ ├── driver.py
│ │ ├── falkordb_driver.py
│ │ ├── graph_operations
│ │ │ └── graph_operations.py
│ │ ├── kuzu_driver.py
│ │ ├── neo4j_driver.py
│ │ ├── neptune_driver.py
│ │ └── search_interface
│ │ └── search_interface.py
│ ├── edges.py
│ ├── embedder
│ │ ├── __init__.py
│ │ ├── azure_openai.py
│ │ ├── client.py
│ │ ├── gemini.py
│ │ ├── openai.py
│ │ └── voyage.py
│ ├── errors.py
│ ├── graph_queries.py
│ ├── graphiti_types.py
│ ├── graphiti.py
│ ├── helpers.py
│ ├── llm_client
│ │ ├── __init__.py
│ │ ├── anthropic_client.py
│ │ ├── azure_openai_client.py
│ │ ├── client.py
│ │ ├── config.py
│ │ ├── errors.py
│ │ ├── gemini_client.py
│ │ ├── groq_client.py
│ │ ├── openai_base_client.py
│ │ ├── openai_client.py
│ │ ├── openai_generic_client.py
│ │ └── utils.py
│ ├── migrations
│ │ └── __init__.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── edges
│ │ │ ├── __init__.py
│ │ │ └── edge_db_queries.py
│ │ └── nodes
│ │ ├── __init__.py
│ │ └── node_db_queries.py
│ ├── nodes.py
│ ├── prompts
│ │ ├── __init__.py
│ │ ├── dedupe_edges.py
│ │ ├── dedupe_nodes.py
│ │ ├── eval.py
│ │ ├── extract_edge_dates.py
│ │ ├── extract_edges.py
│ │ ├── extract_nodes.py
│ │ ├── invalidate_edges.py
│ │ ├── lib.py
│ │ ├── models.py
│ │ ├── prompt_helpers.py
│ │ ├── snippets.py
│ │ └── summarize_nodes.py
│ ├── py.typed
│ ├── search
│ │ ├── __init__.py
│ │ ├── search_config_recipes.py
│ │ ├── search_config.py
│ │ ├── search_filters.py
│ │ ├── search_helpers.py
│ │ ├── search_utils.py
│ │ └── search.py
│ ├── telemetry
│ │ ├── __init__.py
│ │ └── telemetry.py
│ ├── tracer.py
│ └── utils
│ ├── __init__.py
│ ├── bulk_utils.py
│ ├── content_chunking.py
│ ├── datetime_utils.py
│ ├── maintenance
│ │ ├── __init__.py
│ │ ├── community_operations.py
│ │ ├── dedup_helpers.py
│ │ ├── edge_operations.py
│ │ ├── graph_data_operations.py
│ │ ├── node_operations.py
│ │ └── temporal_operations.py
│ ├── ontology_utils
│ │ └── entity_types_utils.py
│ └── text_utils.py
├── images
│ ├── arxiv-screenshot.png
│ ├── graphiti-graph-intro.gif
│ ├── graphiti-intro-slides-stock-2.gif
│ └── simple_graph.svg
├── LICENSE
├── Makefile
├── mcp_server
│ ├── .env.example
│ ├── .python-version
│ ├── config
│ │ ├── config-docker-falkordb-combined.yaml
│ │ ├── config-docker-falkordb.yaml
│ │ ├── config-docker-neo4j.yaml
│ │ ├── config.yaml
│ │ └── mcp_config_stdio_example.json
│ ├── docker
│ │ ├── build-standalone.sh
│ │ ├── build-with-version.sh
│ │ ├── docker-compose-falkordb.yml
│ │ ├── docker-compose-neo4j.yml
│ │ ├── docker-compose.yml
│ │ ├── Dockerfile
│ │ ├── Dockerfile.standalone
│ │ ├── github-actions-example.yml
│ │ ├── README-falkordb-combined.md
│ │ └── README.md
│ ├── docs
│ │ └── cursor_rules.md
│ ├── main.py
│ ├── pyproject.toml
│ ├── pytest.ini
│ ├── README.md
│ ├── src
│ │ ├── __init__.py
│ │ ├── config
│ │ │ ├── __init__.py
│ │ │ └── schema.py
│ │ ├── graphiti_mcp_server.py
│ │ ├── models
│ │ │ ├── __init__.py
│ │ │ ├── entity_types.py
│ │ │ └── response_types.py
│ │ ├── services
│ │ │ ├── __init__.py
│ │ │ ├── factories.py
│ │ │ └── queue_service.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── formatting.py
│ │ └── utils.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── pytest.ini
│ │ ├── README.md
│ │ ├── run_tests.py
│ │ ├── test_async_operations.py
│ │ ├── test_comprehensive_integration.py
│ │ ├── test_configuration.py
│ │ ├── test_falkordb_integration.py
│ │ ├── test_fixtures.py
│ │ ├── test_http_integration.py
│ │ ├── test_integration.py
│ │ ├── test_mcp_integration.py
│ │ ├── test_mcp_transports.py
│ │ ├── test_stdio_simple.py
│ │ └── test_stress_load.py
│ └── uv.lock
├── OTEL_TRACING.md
├── py.typed
├── pyproject.toml
├── pytest.ini
├── README.md
├── SECURITY.md
├── server
│ ├── .env.example
│ ├── graph_service
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── dto
│ │ │ ├── __init__.py
│ │ │ ├── common.py
│ │ │ ├── ingest.py
│ │ │ └── retrieve.py
│ │ ├── main.py
│ │ ├── routers
│ │ │ ├── __init__.py
│ │ │ ├── ingest.py
│ │ │ └── retrieve.py
│ │ └── zep_graphiti.py
│ ├── Makefile
│ ├── pyproject.toml
│ ├── README.md
│ └── uv.lock
├── signatures
│ └── version1
│ └── cla.json
├── tests
│ ├── cross_encoder
│ │ ├── test_bge_reranker_client_int.py
│ │ └── test_gemini_reranker_client.py
│ ├── driver
│ │ ├── __init__.py
│ │ └── test_falkordb_driver.py
│ ├── embedder
│ │ ├── embedder_fixtures.py
│ │ ├── test_gemini.py
│ │ ├── test_openai.py
│ │ └── test_voyage.py
│ ├── evals
│ │ ├── data
│ │ │ └── longmemeval_data
│ │ │ ├── longmemeval_oracle.json
│ │ │ └── README.md
│ │ ├── eval_cli.py
│ │ ├── eval_e2e_graph_building.py
│ │ ├── pytest.ini
│ │ └── utils.py
│ ├── helpers_test.py
│ ├── llm_client
│ │ ├── test_anthropic_client_int.py
│ │ ├── test_anthropic_client.py
│ │ ├── test_azure_openai_client.py
│ │ ├── test_client.py
│ │ ├── test_errors.py
│ │ └── test_gemini_client.py
│ ├── test_edge_int.py
│ ├── test_entity_exclusion_int.py
│ ├── test_graphiti_int.py
│ ├── test_graphiti_mock.py
│ ├── test_node_int.py
│ ├── test_text_utils.py
│ └── utils
│ ├── maintenance
│ │ ├── test_bulk_utils.py
│ │ ├── test_edge_operations.py
│ │ ├── test_entity_extraction.py
│ │ ├── test_node_operations.py
│ │ └── test_temporal_operations_int.py
│ ├── search
│ │ └── search_utils_test.py
│ └── test_content_chunking.py
├── uv.lock
└── Zep-CLA.md
```
# Files
--------------------------------------------------------------------------------
/examples/podcast/podcast_transcript.txt:
--------------------------------------------------------------------------------
```
1 | 0 (3s):
2 | So let's talk a little bit about what you see as the purpose of college. I've heard you say that some people use it for chasing status was your phrase, while others use it to prepare themselves to improve not just themselves and their families, but society. So what do you see as the mission?
3 |
4 | 1 (23s):
5 | Well, part of the ethos of Jesuit institutions from the beginning is that we want our students to learn and get all the tools they need to flourish. And we wanna give them opportunity, but we also want them to have all of that, not just for them, but for the world. That we have this enormous force multiplier of sending them out with the desire to matter and the skills to really do that. And they will choose how, but we really need for them to understand that the saccharine high of just getting the job that pays the most or seeking status for themselves, that's not what will make them happy, and that is not the point of their lives. And so they can do that and still be happy.
6 |
7 | 1 (1m 3s):
8 | But what really drives you is knowing, looking back on your deathbed at your life. How did I matter?
9 |
10 | 0 (1m 11s):
11 | I'd like to introduce our guest for today,
12 |
13 | 1 (1m 13s):
14 | Tania Tetlow, president of Fordham University.
15 |
16 | 0 (1m 17s):
17 | Fordham is a well-regarded private university in New York City, founded in 1841 and run for most of its history by the Jesuits, the Roman Catholic religious order that dates to the 16th century. Tetlow is the first female president of Fordham, as well as the first layperson.
18 |
19 | 1 (1m 34s):
20 | There's a very daunting hall of portraits outside of my office. You know, all of these priests going back to 1841,
21 |
22 | 0 (1m 41s):
23 | Tetlow's own father was in fact a priest. But while getting his psychology PhD at Fordham, he met his Wouldbe wife, another graduate student, so he left the priesthood. Tania was born in New York not long before the family moved to New Orleans, so Fordham is in her genes.
24 |
25 | 1 (2m 0s):
26 | A good way to recruit me is they can tell me you exist because of us.
27 |
28 | 0 (2m 4s):
29 | Fordham did recruit her and she returned as president in 2022. Before that, Tetlow was president of Loyola University in New Orleans, another Jesuit school, one of 27 in the us, and about 130 globally. The Jesuits have always been big on educating as well as evangelizing. Tetlow is a lawyer by training and taught law for a while at Tulane. And before that she was a federal prosecutor in New Orleans. What does it say about the state of higher education that Fordham chose as its president? Not only a non priest, but a former prosecutor?
30 |
31 | 1 (2m 44s):
32 | We spent our time, all of us in these jobs playing defense and navigating crises. Everything from the protest movements to efforts from those who work here to make sure that they're paid well and fairly and how to balance that against remaining affordable to students and bridging that gap just gets harder and harder
33 |
34 | 0 (3m 6s):
35 | Today on Freakonomics. Radio. Another conversation in our ongoing look at what college is really for. With higher ed under attack from multiple angles, Tetlow sees an urgency in turning things around
36 |
37 | 1 (3m 20s):
38 | The countries against whom the US competes. None of them are disinvesting from education right now.
39 |
40 | 0 (3m 26s):
41 | We talk about the difference between religious and secular universities.
42 |
43 | 1 (3m 30s):
44 | I don't have to be afraid to talk about values in my out loud voice.
45 |
46 | 0 (3m 34s):
47 | And we talk about why despite all the trouble and controversy, the enterprise is worth defending.
48 |
49 | 1 (3m 41s):
50 | If you want a great city, build a university and wait 200 years.
51 |
52 | 4 (3m 59s):
53 | This is Freakonomics Radio, the podcast that explores the hidden side of everything with your host Steven Dubner. Woo,
54 |
55 | 0 (4m 15s):
56 | Kamala Harris. Before serving as Vice president and US Senator was a prosecutor, the district attorney for San Francisco and the California Attorney General. Now that she's running for President Harris is leaning into her experience as a prosecutor.
57 |
58 | 5 (4m 33s):
59 | So in those roles, I took on perpetrators of all kinds. So hear me when I say I know Donald Trump's type.
60 |
61 | 1 (4m 47s):
62 | As a fellow former prosecutor, I really admire that background in her.
63 |
64 | 0 (4m 52s):
65 | Can you imagine ways in which that background can be useful as perhaps president of the United States?
66 |
67 | 1 (4m 59s):
68 | Well, in a funny way, you have such ultimate power as a prosecutor over your one single case. I found that really good preparation for having power in other settings.
69 |
70 | 0 (5m 13s):
71 | What did you learn from being a prosecutor that helps you in your role as a college president?
72 |
73 | 1 (5m 18s):
74 | It's the only kind of lawyer where your ethical duty is not to represent a client but to do justice. That is what you're charged with. And so I spent as much time talking to witnesses or defendants who are cooperating about how they ended up there and what their lives were like, and really learning who they were as people in ways that I don't know is typical of people in that job. But I really loved,
75 |
76 | 0 (5m 40s):
77 | Tell me maybe your most memorable case.
78 |
79 | 1 (5m 43s):
80 | I had a case where a high school teacher helped an old buddy who was in prison collect some packages.
81 |
82 | 0 (5m 54s):
83 | This isn't gonna end well. No.
84 |
85 | 1 (5m 57s):
86 | And it was just one of the most fascinating cases about human beings and how we dilute ourselves. A high school teacher whose old buddy from high school, the popular kid who would never talk to him in high school, finally reached out from prison to see if they could be friends. And he, out of so many high school drama kind of psychology, decided, oh, sure, I will accept these packages coming in the mail without knowing what they are. And got dragged into this whole drug scheme. So the teacher who got dragged into it cooperated, no one else would've been brave enough to do it because he was up against the major kingpins.
87 |
88 | 0 (6m 33s):
89 | He's your witness then
90 |
91 | 1 (6m 34s):
92 | He's my witness. And we were going against the person who was running a heroin scheme from jail. But it took a long time to just get him to admit his real emotions rather than have bravado on the stand. I finally, after berating him and prep got him to admit I was afraid.
93 |
94 | 0 (6m 52s):
95 | I mean, I don't blame him. Did you win that case? Yes. So when I think of the Jesuit tradition, I think of inquiry and intellectualism and I think especially of the concept of discernment, which I gather is very important within the tradition. And it, it strikes me that discernment is fairly absent these days, at least in the public square. And that's one reason I wanted to speak with you today because I figured you could teach me and all of us a little bit about how to get in touch with that, maybe apply it. So I'd like you to define discernment as you see it and describe how you try to spread that as a president of a Jesuit university,
96 |
97 | 1 (7m 35s):
98 | It is basically the opposite of social media in shorthand. So discernment means to take time to consider a big decision and not to jump to conclusions. It means being open and curious. It means assuming good intentions of the person you're disagreeing with, which we are all very bad at right now. And it means being self-aware enough of your own biases and filters that you realize what will prevent you from seeing the truth. And right now, I think we're all feeling the pressure to teach those skills to our students, especially this fall as we approach the election and all the turmoil that society's going through.
99 |
100 | 1 (8m 19s):
101 | How do we double down on teaching those skills when they have become so countercultural?
102 |
103 | 0 (8m 23s):
104 | Yeah, but I would imagine that you are recruiting for students who already buy into the notion of discernment. No,
105 |
106 | 1 (8m 30s):
107 | It's chicken and egg, right? The students who are attracted to us tend to have this sense of purpose, and I will say the two Jesuit institutions I've led have student communities who don't lean into self-righteousness in quite the same way that young people are tempted by right now.
108 |
109 | 0 (8m 47s):
110 | What do you think would happen if you could play some version of Freaky Friday and bring the entire educational architecture of Fordham to a place like Harvard or Penn for a week and apply all the layers of discernment in education there? How would that go over with those student bodies do you think? Well,
111 |
112 | 1 (9m 10s):
113 | There is a freedom I find in being in a religious institution where I don't have to be afraid to talk about values in my out loud voice in quite the same way that in a secular institution we were just so afraid of offending by having any reference to religion at all.
114 |
115 | 0 (9m 28s):
116 | Can you give an example of some kind of conversation you might've liked to have at Tulane where you felt it wouldn't be accepted?
117 |
118 | 1 (9m 38s):
119 | When we would talk about diversity there, we were left to some of the more tepid values of hospitality and welcome. And when I talk about it at a Jesuit institution, I'm able to really lean into the fact that our faith believes profoundly in the equality and human dignity of every single person, that we believe that we owe people more when they need more.
120 |
121 | 0 (10m 5s):
122 | Pope Francis, who's the first Jesuit pope, has said that some universities I know in America are too liberal and he accused them of training technicians and specialists instead of whole people. I'm curious for your take on that.
123 |
124 | 1 (10m 18s):
125 | Well, it's interesting because this parallel attack in this country on the value of liberal arts, and for us as Catholic institutions, we clinging to our core curriculums fiercely in this country. It's not really a liberal problem, it's more from the other side, this mocking of English majors as if much of the powerhouse of this country didn't major in English, right? And when we talk to employers, they're desperate for us to teach those kind of emotional intelligence, communication, critical thinking skills that you learn in philosophy in English and all of those kinds of courses because that's really hard to teach on the job. They can teach technical skills on the job, and frankly, the technical skills we teach are often defunct by the time the kids graduate.
126 |
127 | 1 (11m 6s):
128 | Right? Those change too much.
129 |
130 | 0 (11m 9s):
131 | So Fordham is a Catholic university, but the share of students who describe themselves as Catholic surprised me. Can you talk about that?
132 |
133 | 1 (11m 17s):
134 | It's about 40%. We became religiously plurals in a way that's kind of a hidden story of American higher ed Catholic students were not always welcome in the first half of the 20th century and before at elite institutions, which we sometimes forget, were founded as Protestant institutions and had attitudes towards really immigrants, Irish, Italians, others coming in off the ships and not wanting them there in the same way they created quotas and caps for Jewish students. And so Catholic schools when they were founded were full of Catholics who did not have other options. And we welcome Jewish students who often did not have other options. When those doors opened, we had some of the same dilemmas of women's colleges and HBCUs of what do we do?
135 |
136 | 1 (12m 3s):
137 | And so we very much welcome students from all face and it changed who we are. We became very ecumenical. But now far more of our student body is just secular. They were raised with no religious tradition whatsoever.
138 |
139 | 0 (12m 17s):
140 | When I look at the student population at Fordham, I see that it's got about 40% of what are called underrepresented populations, 17% Hispanic Latino, 13% Asian, 5.5% black. It strikes me that you are significantly more diverse than a lot of the very liberal schools that talk about diversity a lot. How does that happen?
141 |
142 | 1 (12m 41s):
143 | Well, partly success begets success. To come to a school that is already diverse means you have strength in numbers where you won't be alone. and I think it really helps to be in New York a place that is already so diverse. We get to recruit in our backyard, we get to attract people to a city that has everyone in the world here.
144 |
145 | 0 (13m 2s):
146 | I'm curious how the Jesuit tradition and Catholicism generally intersect with the politics of this moment. Many of my Catholic friends and family members are really torn because they don't like Donald Trump as a person or a candidate for a variety of reasons. But they do really like the fact that he's created a Supreme court that has put much stricter limits on abortion. And I'm curious how that plays out at Fordham.
147 |
148 | 1 (13m 29s):
149 | Well, Catholic doctrine does not neatly fit in either political party because in many ways it's the opposite of libertarianism, which also doesn't neatly fit in either party. So you know, Catholic teaching would be somewhat more conservative, restrictive on social issues, but far more Progressive on economic issues than the Republican party. Right? Catholic social teaching to many more conservative Catholics seems incredibly radical, but it is in fact the doctrine we've had for a very long time and the church, and it's pretty clearly what's in the gospels.
150 |
151 | 0 (14m 1s):
152 | Give an example of that for those who don't know.
153 |
154 | 1 (14m 4s):
155 | You know, the Catholic Church believes profoundly in caring for the poor is a priority of caring about the right to organize labor, racial justice, all of those kinds of issues that don't neatly fit with a Republican party that does care about restricting abortion and other things. In American society, we've always had a balance that was critical between individual rights and a sense of community and responsibility. That balance is really out of whack right now. We've leaned so heavily into individual rights, which are crucial, but if they're unmoored from the idea of community of what we owe each other, they're really quite dangerous if we're all in it for ourselves, Who, Are, We.
156 |
157 | 1 (14m 48s):
158 | And so what Catholic teachings really offer is a reminder that we do have to care about community. That we have not just rights, but responsibilities
159 |
160 | 0 (14m 58s):
161 | After the break. The friction between rights and responsibilities and how it played out at Fordham this past spring.
162 |
163 | 1 (15m 4s):
164 | You don't point bullhorns at the library during study session.
165 |
166 | 0 (15m 7s):
167 | I'm Steven Dubner, you're listening to Freakonomics Radio. We will be right back As president of Fordham University. Tania Tetlow oversees roughly 17,000 students and 750 faculty. The biggest majors are in finance, psychology, and government. Fordham also has several prestigious graduate programs in business and law education and social work, and even some theology still. The school is split between two main campuses, both in New York City, one in the Rose Hills section of the Bronx, the other at Lincoln Center in Manhattan.
168 |
169 | 0 (15m 48s):
170 | Those two campuses are about nine miles apart. If you walked from one Fordham campus to the other, you would pass right through Columbia University. This past spring as pro-Palestinian demonstrators set up encampments at many schools. Columbia had some of the most intense protests, which led to more than a hundred arrests. So what was happening at Fordham, I asked Tetlow to describe it.
171 |
172 | 1 (16m 14s):
173 | We have students who are from Palestine who are very worried about parents and grandparents they can't get in touch with. They're going through all the stages of grief and trauma, and they've been extraordinary. And I've also felt, you know, if yelling at me will make you feel better for even half a minute, go for it. It is my honor, because they're feeling so powerless. We also have members of our community who are Jewish and Israeli and who lost family members on October 7th. And so it made me realize how close New York is to the Middle East and of how profound that pain is for part of our community.
174 |
175 | 1 (16m 57s):
176 | And so what was really impressive this year is student activists did prayer vigils and they did teach-ins and they talked and they listened and they engaged with complexity and they really tried to do the work of expressing outrage at that which they're outraged by, but without just yelling at the nearest authority figure or trying to disrupt the right of their fellow students to learn. That got ratcheted up when the clearing out of Hamilton Hall at Columbia happened
177 |
178 | 0 (17m 29s):
179 | By the police. We should say
180 |
181 | 1 (17m 31s):
182 | By the police. Yeah. And so the next morning students who told us later were really upset by that came and started a little encampment in a classroom building in our Manhattan campus. We persuaded most of them to leave, but we did end up having the police arrest on minor misdemeanors, about 15 mostly students. So that was painful because you know, how do you navigate the rights of our 17,000 students to learn on the cusp of finals with the rights of those dozen students to express themselves and to protest? And it was really hard.
183 |
184 | 0 (18m 8s):
185 | And what happened then? Did it deescalate after those arrests? Yes. I've read that when you were a kid, your father who was a psychologist and professor and also counseled prisoners that he had a sign on his desk that said question authority, but politely and with respect. How do you feel that slogan relates to, let's say, the campus politics around this particular issue at Fordham? Was authority questioned politely with respect and fruitfully or not really? I think
186 |
187 | 1 (18m 42s):
188 | For the most part it was, we met with student activists and they have been profound and persuasive and respectful and thus very effective, right? Going to people and saying, I think that you are an evil, awful person and I'm gonna scream at you until you agree with me doesn't work. It feels good. It's venting, but it is not the same as activism. We have always authorized any request to protest on our campus that students bring us. We're at a hundred percent with that. But what we navigate with them is, you know, you don't point bullhorns at the library during study session. You find ways to make your ability to express yourself, not have to disrupt the education of your fellow students.
189 |
190 | 1 (19m 23s):
191 | And so when we think about those restrictions, we need to think about them both for protests we agree with and those we don't. You can't just imagine that the protestors are expressing a cause that you believe in. You also have to imagine one that you might find repugnant because the rules have to be the same for both or we lose credibility.
192 |
193 | 0 (19m 40s):
194 | I know that back in 2016, which predates your presidency by quite a few years, there was a movement by Fordham students to start a chapter of Students for Justice in Palestine, which is a national organization, and that was at the center of many of the campus protests last year. And that was denied. I believe that there was a court case around that and the court upheld the Fordham decision, if I've got that correct. Yes. and I also know that according to the foundation for individual rights and expression fire, which looks at free speech on campuses, Fordham ranks in the bottom 10 for colleges or universities across the country. So how do you as a president try to create a balance where you're not liming free speech, but also not churning your campus into a hotbed where it can't accomplish the central purpose?
195 |
196 | 1 (20m 30s):
197 | First of all, those fire rankings, we don't really understand how they come to them. It is always tricky, right? At Fordham, we famously, and it got litigated suspended. A student who after a verbal argument with fellow students, went and bought an assault rifle and then posted that on social media. If he had shot up the campus, we would've been reamed If. We had not done anything, was so obvious a warning. But by suspending him, we got really attacked by some free speech purist groups saying, how dare you? It's just because you're against guns, right? So those are the kinds of lines we have to navigate every day. And what I find really a shame right now is those who push for more speech on campus have suddenly flip flopped on a lot of those issues.
198 |
199 | 1 (21m 15s):
200 | Right now they're yelling at us because we don't suppress speech more. This would've been a moment to really stand up and say, we find some of these protests to be anathema and disturbing, but this is what it looks like to put up with speech that you disagree with. But instead we're just being called hypocrites because we don't suppress it and they're being hypocrites in accusing us of hypocrisy. So it's very head spinning because what remains is the question of are you for this freedom or are you not?
201 |
202 | 0 (21m 43s):
203 | Do you have any evidence that discernment, as we discussed earlier, can help fight polarization or these kind of standoffs in the moment?
204 |
205 | 1 (21m 55s):
206 | I know from our faculty that every day in the classroom they try to not just teach knowledge, but the skills of discernment of what it means to have reflective practices where we're gonna really think about what we learned and stop and take time. This is something that as a law professor, as part of our ethos, I need for you to articulate the other side of the argument. Not because we're morally relativist, but because you can't know the strength of your belief until you're willing to think about the other side.
207 |
208 | 0 (22m 24s):
209 | And as a lawyer, your job is to argue the best case for whoever you end up representing, which I guess is a way to train in seeing the other side. Yeah,
210 |
211 | 1 (22m 33s):
212 | Right. I mean, legal education has a leg up in this because we've always done this work. and I think our faculty do a brilliant job of navigating how to take the temperature down when people disagree, how to say, okay, you are attacking the other student who you disagree with. You're attacking them personally. You're assuming they have bad intentions, you're not listening to them.
213 |
214 | 0 (22m 53s):
215 | Are you sure this is the job you want? I mean, it's a hard job.
216 |
217 | 1 (22m 57s):
218 | It is a very hard job, but I do love it because it matters. And sometimes things are hard because they're important.
219 |
220 | 0 (23m 4s):
221 | So one way universities are important, or at least supposed to be, is as an institution that can build social trust. Researchers who study this argue that universities and the military and even sports teams or places that do this well because in each case you've got a bunch of individuals from different backgrounds coming together with a common goal, or at least as part of a community. And I'm really curious how you think about, I mean this is an absurd and large question, but how you think about the rights and role of the individual in a community or society today with Fordham as the microcosm of that?
222 |
223 | 1 (23m 43s):
224 | Well, universities are one of the places of great hope. We do bring people together. And that's not just the obvious demographics, it's also rural and urban. It's different backgrounds economically, it's just different upbringings. And we've leaned into that from a Progressive point hard, but also that they find commonality that they have so much more in common when they least expect it. I think that our job is to express both and to treat diversity as we used to be allowed to do before the Supreme Court banned it, but about that quality of community and what it means. And so the court has continued to allow that in the military academies 'cause they understand exactly how valuable it is there.
225 |
226 | 1 (24m 24s):
227 | They've now forbidden us from overtly considering that in admissions. But regardless, we have the opportunity in our communities to really encourage, nudge, persuade students to know each other, to lean into that. For example, Greek life can be wonderful, but it can also divide. So we don't have that here. We try to find ways to get students to bond that aren't the obvious, finding people from exactly your tribe, but really reaching out across that. But it is,
228 |
229 | 0 (24m 56s):
230 | What's it for instance of that, of
231 |
232 | 1 (24m 58s):
233 | Kind of making student organizations really more about interest than about identity or self-selection and exclusivity? One of the most important places we teach is in the residence halls, right, of how we use peer mentoring because we have RAs who are just a little bit older than the students that they're mentoring and thus have credibility that we don't and of how they're on the front lines of navigating that profound loneliness that modern society has created. Social media sort of buries them in connection that is empty, especially after Covid when they were literally isolated. They have to learn the skills of how to really be with each other.
234 |
235 | 1 (25m 38s):
236 | And we're now having to teach that in ways that we didn't 10, 20 years ago.
237 |
238 | 0 (25m 46s):
239 | After the break, Tania Tetlow on university finances and pricing we're
240 |
241 | 1 (25m 52s):
242 | Stuck in a really stupid pricing model.
243 |
244 | 0 (25m 55s):
245 | I'm Steven Dubner. This is Freakonomics Radio. We'll be right back. Tell me a little bit about the finances of Fordham, maybe operating budget, and I'm just curious to know how things are looking.
246 |
247 | 1 (26m 16s):
248 | It's going well. We're not on the kind of crisis that most of higher ed is in right now financially, but it's still a squeeze. Every year we're hitting the ceiling of what American families can afford to pay in a world where we very much want to have normal and fair and generous pay increases for all of our employees. We're basically a service industry. So most of our budget goes to our people. And so those pressures are hard because we're pretty tuition dependent to pay for that. Our budget's about 700 million. Most of that is for the people we hire. It's very labor intensive work to teach and serve and then maintain a campus.
249 |
250 | 1 (26m 56s):
251 | What's
252 |
253 | 0 (26m 57s):
254 | Your endowment of Fordham?
255 |
256 | 1 (26m 58s):
257 | It is just about a billion.
258 |
259 | 0 (27m 1s):
260 | Okay, so that sounds like a lot of money to the average person except Harvard's is 50 billion.
261 |
262 | 1 (27m 5s):
263 | Exactly. It's hard fought for a school that mostly taught first generation students for so many decades, almost two centuries. It's sort of like a museum endowment that that interest on that is what supports us. And in our case very specifically supports primarily scholarships. And for us it's you know, maybe 5% of our budget. It's not like an Ivy League that's no longer dependent on tuition because they get so much revenue from their endowment.
264 |
265 | 0 (27m 33s):
266 | What would you do if you had a $50 billion endowment at Fordham? Well,
267 |
268 | 1 (27m 37s):
269 | We'd be able to fully meet need for all of our students, first and foremost, which would be a joy. And you know, we'd invest in everything that we wanna do and our ambitions, like
270 |
271 | 0 (27m 47s):
272 | What would that be?
273 |
274 | 1 (27m 48s):
275 | It would be research, but it really matters to keep that in balance with the quality of our teaching. So you know, research prowess, that also means those faculty are in the classroom every day teaching students. We are so strong in the humanities and law and business and to really be relevant and at the table, we need to connect with what's going on in AI with how to wake people up about climate change and find answers to the threats to democracy all over the world.
276 |
277 | 0 (28m 17s):
278 | College is just absurdly expensive. Fordham is in the $60,000 a year range tuition, is that right? Yeah. So talk about how you deal with financial aid, whether it's need-based and also merit aid. So
279 |
280 | 1 (28m 31s):
281 | We are need blind and admissions, but we are not one of the handful of schools wealthy enough to fully meet need. And so that is our biggest priority. The biggest part of our budget is making ourselves affordable. We're starting to try to shift more of our money from merit aid to financial need. The advantage of merit aid is you attract top students, you make them feel more special because of the scholarship. The disadvantage is of course some of those students who are the top students also have need, but some of them don't. And so you're spending money that you'd rather spend on those who can't afford to be there. But we're stuck in higher ed in a really stupid pricing model.
282 |
283 | 1 (29m 11s):
284 | The part that we know about is the price discrimination, where we charge the wealthy, what they can afford to pay and thus supplement those who can't. But the part that I think is hidden is that the market really drives sticker price being high because sticker price signals quality. The elite schools tend to have more of the barbell, the very wealthy, and those really struggling. Most of us have far more of the middle class who often frankly get squeezed out of the elite schools when schools like ours reduce our sticker price to what we tend to actually charge. On average, those schools have tended to fail because the consumer is suspicious that that school is not as good because it does not charge as much.
285 |
286 | 0 (29m 54s):
287 | So what is your actual average price that let's say an incoming freshman will pay this year with a sticker price of around 60 K. What will the actual average be?
288 |
289 | 1 (30m 2s):
290 | 30.
291 |
292 | 0 (30m 3s):
293 | Wow. Well, there have been accusations that colleges and universities have colluded in the past. Sometimes they've been busted for it. There are others who argue that they should collude more and I would think that this would be a case where collusion would be good to fight this very problem that you're talking about. Has there been any progress toward that?
294 |
295 | 1 (30m 20s):
296 | So there's a world where we would all say, okay, let's all lower our prices to what we really charge because that sticker price is so disheartening and so scary to those without the sophistication to understand it's not real, but we're not allowed to do that. We can't collude on price. So this is where the market is. You know, it sounds silly except that when you go to buy, you know a jacket and there's one jacket that's a hundred dollars, that's 50% off and one jacket that's $50. Even if they're the same jacket, you're gonna go for the first one, right? This is human psychology. This is how we all behave. And if you get the 50% off because you are special because you earned the scholarship, it makes you feel even better about it.
297 |
298 | 1 (31m 1s):
299 | And so it is very hard for us to break out of this system.
300 |
301 | 0 (31m 5s):
302 | Let's talk a little bit about growing the size of student populations. Historically, the college population in the US rows and rows and rows and rows and rows. But then it hit what looked to be a bit of a ceiling and it's come back down a little bit. There are some schools, however, who just don't like to grow. There's research by these two economists, Peter Blair and Kent s Smithers that finds that elite colleges have mostly capped their enrollment numbers since the 1980s. Their argument is that those caps have to do with mostly universities wanting to maintain their prestige, protect their reputations, and they argue in a kind of quiet voice that this is a shame. The idea being that if these universities are so good and so elite at educating people, they should educate more people.
303 |
304 | 0 (31m 48s):
305 | Just like any firm that successful wants more customers, not the same number. So let's just start with that. Your thoughts on the notion that elite schools keep their populations about the same. Why they do that and why you're not thinking like that?
306 |
307 | 1 (32m 5s):
308 | When you look at when elite schools stopped growing, it was exactly the same time US News introduced the rankings and those rankings until very recently encouraged a major category of selectivity. It created these profound incentives for all of us. But you know, the elites who battle with each other for top dog to reject as many students as possible, that's how you were measured. The elites get status and prestige and very specifically rankings by virtue of how low that acceptance rate is. My favorite satirical headline once was, Stanford achieved 0% emission rate. It was a joke, but it was something very real.
309 |
310 | 0 (32m 44s):
311 | Just barely. Yep.
312 |
313 | 1 (32m 45s):
314 | Yes, exactly. That's where we've landed. The idea that the solution to this is to get a few thousand more students into those elite schools, I think begs the question of why they are the answer. Because what the rankings also did is it took a higher ed system of glorious complexity and variety, about 4,000 nonprofit schools, and it put us in line order when really we're in clumps of ties. And it was never true that you could only get a good education at a handful of schools. I think to buy into that, to say that that should be the focus really ignores the fact that there are probably a hundred universities in this country that provide the same kind of academic excellence, and we need to remind ourselves of that because the more we just play into the rankings game of chasing status, the more alumni get status from giving to those universities.
315 |
316 | 1 (33m 35s):
317 | We've really ratcheted up the cleaving between the haves and have nots and that gets worse and worse.
318 |
319 | 0 (33m 41s):
320 | So Fordham, I believe, has increased enrollment by about 10% over the past 10 years. Does that sound about right?
321 |
322 | 1 (33m 48s):
323 | I think so, yeah.
324 |
325 | 0 (33m 49s):
326 | So talk to me about that. When you're trying to grow, especially in a city like New York, what are the big challenges? Are there enough good professors? What does it mean for facilities? Are there enough students that you want and so on?
327 |
328 | 1 (34m 1s):
329 | The biggest challenge is students because right now we have a demographic downturn in the number of 18 year olds generally, and that will peak 18 years after the 2008 recession started. People dramatically had fewer children, but we also have a drop in the percentage of Americans going to college, and that has been rather dramatic. It's a mix of covid and then most recently of the FAFSA formed debacle. So you may have seen in the news, but the Department of Ed stumbled for all sorts of reasons to redo the FAFSA form.
330 |
331 | 0 (34m 40s):
332 | In case you haven't seen the FAFSA debacle in the news, FAFSA stands for free application for federal Student aid. It is administered by the federal government. This past admission season, there were technical problems that meant FAFSA came online three months late and then sent inaccurate financial aid offers to around a million applicants.
333 |
334 | 1 (35m 3s):
335 | What it means is that for most schools, they're looking at a decline in their populations and in community colleges, especially a quite dramatic one. So for any school other than the very, very elites to grow is not possible. Right now what I worry about is that for most of higher ed, they're just not gonna be able to make it anymore and the country will suffer so bunch from that. We understand still as a society that K through 12 is a right, is not seen as some kind of calming experiment, but somehow higher ed is not seen as a right anymore. After World War II was the last time the economy really shuttered to a halt because we weren't building weapons anymore and Congress made the brilliant decision to invest in all those millions of veterans coming home from the war who would not have jobs to say, we will pay for your education.
336 |
337 | 1 (35m 53s):
338 | And it fueled so many Nobel prizes and Pulitzers and the rise of the middle class in the fifties and global economic dominance in the world. It was such a smart thing to do. And yet now we're doing the opposite. The Pell Grants, which when they were unveiled in the seventies, were enough to cover tuition. Room and board for most schools now are a pittance and states are disinvesting from their public institutions. China's not doing that.
339 |
340 | 0 (36m 20s):
341 | The public's perception of academia has fallen a lot. It began on the right, but now the left is catching up. There are many perceptions out there, one of which is that college campuses can be hostile to young men. Fordham is now majority female. I was surprised to see there's another perception that colleges are hostile to anyone who leans even a little bit conservative in any dimension. Students and faculty, there's the perception that it's too expensive, it's too exclusive, it's not useful enough in the real world. So how are you reckoning with that general perception of decline?
342 |
343 | 1 (36m 56s):
344 | Well, it's hard because there's great political benefit to tearing down trust in institutions. It's easy to do, it resonates with people who are understandably cynical. And once you've done it, it's done. And it's very hard to rebuild. You know, all of higher ed has become majority female and that's a much deeper topic to grapple with than what I worry about as well.
345 |
346 | 0 (37m 17s):
347 | You worry because there are all those men who are not getting involved in that kind of system.
348 |
349 | 1 (37m 22s):
350 | Exactly. I think men are, are opting out of the opportunities that they need in an increasingly knowledge based economy and we will all suffer as a result of that. And so I worry about that. So the return on investment is sort of laughable because when you look at the data, it is so clear the financial return on investment, right, which just proves that you can make things up and they stick. and I would say that part of what I find really offensive are politicians saying that it's not worth it to go to college. None of whom say that to their own children,
351 |
352 | 0 (37m 53s):
353 | None of whom didn't go to college either. Exactly. And law school on top of that
354 |
355 | 1 (37m 58s):
356 | And graduate school. So you know, we've become a political football of late in ways that make us really vulnerable. But what's so sad about that is, you know, the countries against whom the US competes, none of them are disinvesting from education right now. We are shooting ourselves in the foot in profound ways. When we decide for political points, we will take away one of the great higher education systems in the world that's been the envy of the world for so long. We're going to keep pulling back from it, pulling funds, pulling credibility and trust, all for scoring political points in a temporary way.
357 |
358 | 0 (38m 37s):
359 | If we're going to talk about the attacks on institutions generally, let's not ignore the one that you're associated with, which is the Catholic church. That's a case where it mostly revolved around the priest sex scandals that have been revealed and the coverups really of the past 30 or 40 years. I haven't seen numbers lately on the perception of the Catholic church as an institution, but I'm guessing it's fallen very similarly to the way the reputation of colleges and universities have.
360 |
361 | 1 (39m 5s):
362 | The trust in religious institutions generally plummeted a while back. And then of course trust in the Catholic church given the scandals deservedly plummeted. What I know from having spent much of my career fighting against sexual abuse is that that denial, those cover ups, the level of abuse still exists in all other institutions that have trusting relationships over children. And my worry is we're not learning the painful lessons the church learned.
363 |
364 | 0 (39m 35s):
365 | What other institutions do you mean?
366 |
367 | 1 (39m 37s):
368 | We're seeing scandals emerge from Boy Scouts, from other religious institutions, but also the vast majority of child sex abuse happens within families. What I used to do every day was to go into court and beg judges to care about that. And they found it so depressing that they just decided it was made up most of the time. You know, that's a whole other episode. But the reality is again, these problems weren't unique to the church. The church really messed it up and my hope is that everyone else will stop being in denial about where we still have a crisis.
369 |
370 | 0 (40m 11s):
371 | Do you have much a relationship with the cardinal of the Archdiocese of New York?
372 |
373 | 1 (40m 15s):
374 | Yes. Cardinal Dolan and I get together at least once a year, if not more often. It's not that Catholic universities report to the church, nor do we get funding from them. But we exist in relationship and I'm lucky in that it's a very friendly and cordial relationship.
375 |
376 | 0 (40m 34s):
377 | Do you think it makes sense that academic institutions like Fordham have such big tax advantages in a city like New York? You know, if you look at the biggest landowners in New York, two of them are universities, Columbia and NYU, and then the Catholic church is another big one and they're all tax exempt and you at for mer, kind of at the sweet spot of those two. Does that make sense to you in a 21st century tax environment?
378 |
379 | 1 (41m 4s):
380 | Here's why it does. When you are taxing a for-profit entity, you are creating a business expense. You're taking off a profit margin to fund city institutions. The idea in general is that if you are a nonprofit civic organization doing good for the world, we'd rather you spend your money doing that. We are huge economic engines for cities. Senator Moynihan a great quote that if you want a great city, build a university and wait 200 years. So if you were to design what will make an economy flourish, it would not just be the infrastructure taxes, pay for it would be great universities,
381 |
382 | 0 (41m 44s):
383 | If, We, were looking ahead to Fordham, let's say 20 or maybe even 50 years from now. In what significant ways would you like it to be very different than it is today? You can keep all the good stuff, but what would you like to change?
384 |
385 | 1 (41m 58s):
386 | I think when I look ahead deep down that what I would like us to do is to not chase status. It's just to do good for the world. And that has become ever more crucial because the problems of the world just seem so urgent and full of despair. And so that we look back on our careers here at Fordham and know that we mattered and not about silliness, that doesn't matter, but we have hundreds of thousands of living alumni and they matter every day in ways we'll never see. And did we have a profound impact on the kind of ethics and empathy and work that they do every day?
387 |
388 | 0 (42m 39s):
389 | I'd like to thank Tania Tetlow, president of Fordham University for a conversation that was much meatier than many conversations I hear these days with people in positions of authority. So I appreciate her forthrightness and her courage in saying how she really sees things, or at least what I think is how she really sees things. Maybe I've been the target of a massive con job, but I don't think so. One reason I wanted you to hear this conversation today is because next week we are going to start playing for you an updated version of one of the most important series we've ever made about the economics of higher education, the supply and the demand, the controversies and the hypocrisies, the answers and the questions.
390 |
391 | 6 (43m 22s):
392 | Why are more women going to college than men?
393 |
394 | 7 (43m 25s):
395 | What happens when black and Hispanic students lose admissions advantages?
396 |
397 | 8 (43m 29s):
398 | How does the marketplace for higher education operate?
399 |
400 | 0 (43m 34s):
401 | Hi, tell you something. It's
402 |
403 | 1 (43m 35s):
404 | A darn good question.
405 |
406 | 0 (43m 37s):
407 | That's next time on the show. Until then, take care of yourself and if you can someone else too. Free Economics Radio is produced by Stitcher and BU Radio. You can find our entire archive on any podcast app [email protected], where we publish transcripts and show notes. This episode was produced by Zach Lapinski, with help from Dalvin Aji. Our staff also includes Alina Coleman, Augusta Chapman, Eleanor Osborne, Elsa Hernandez, Gabriel Roth, Greg Rippin, Jasmine Klinger, Jeremy Johnston, John nars, Julie Canford, lyric bdi, Morgan Levy, Neil Carruth, Rebecca Lee Douglas, Sarah Lilly, and Teo Jacobs. Our theme song is Mr. Fortune by the Hitchhikers. Our composer is Luis Gura.
408 |
409 | 0 (44m 19s):
410 | As always, thanks for listening.
411 |
412 | 1 (44m 25s):
413 | We have always, sorry, trying to think of the word,
414 |
415 | 4 (44m 35s):
416 | The Freakonomics Radio Network, the hidden side of everything.
417 |
418 | 10 (44m 42s):
419 | Stitcher.
```
--------------------------------------------------------------------------------
/graphiti_core/graphiti.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Copyright 2024, Zep Software, Inc.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 |
17 | import logging
18 | from datetime import datetime
19 | from time import time
20 |
21 | from dotenv import load_dotenv
22 | from pydantic import BaseModel
23 | from typing_extensions import LiteralString
24 |
25 | from graphiti_core.cross_encoder.client import CrossEncoderClient
26 | from graphiti_core.cross_encoder.openai_reranker_client import OpenAIRerankerClient
27 | from graphiti_core.decorators import handle_multiple_group_ids
28 | from graphiti_core.driver.driver import GraphDriver
29 | from graphiti_core.driver.neo4j_driver import Neo4jDriver
30 | from graphiti_core.edges import (
31 | CommunityEdge,
32 | Edge,
33 | EntityEdge,
34 | EpisodicEdge,
35 | create_entity_edge_embeddings,
36 | )
37 | from graphiti_core.embedder import EmbedderClient, OpenAIEmbedder
38 | from graphiti_core.errors import NodeNotFoundError
39 | from graphiti_core.graphiti_types import GraphitiClients
40 | from graphiti_core.helpers import (
41 | get_default_group_id,
42 | semaphore_gather,
43 | validate_excluded_entity_types,
44 | validate_group_id,
45 | )
46 | from graphiti_core.llm_client import LLMClient, OpenAIClient
47 | from graphiti_core.nodes import (
48 | CommunityNode,
49 | EntityNode,
50 | EpisodeType,
51 | EpisodicNode,
52 | Node,
53 | create_entity_node_embeddings,
54 | )
55 | from graphiti_core.search.search import SearchConfig, search
56 | from graphiti_core.search.search_config import DEFAULT_SEARCH_LIMIT, SearchResults
57 | from graphiti_core.search.search_config_recipes import (
58 | COMBINED_HYBRID_SEARCH_CROSS_ENCODER,
59 | EDGE_HYBRID_SEARCH_NODE_DISTANCE,
60 | EDGE_HYBRID_SEARCH_RRF,
61 | )
62 | from graphiti_core.search.search_filters import SearchFilters
63 | from graphiti_core.search.search_utils import (
64 | RELEVANT_SCHEMA_LIMIT,
65 | get_mentioned_nodes,
66 | )
67 | from graphiti_core.telemetry import capture_event
68 | from graphiti_core.tracer import Tracer, create_tracer
69 | from graphiti_core.utils.bulk_utils import (
70 | RawEpisode,
71 | add_nodes_and_edges_bulk,
72 | dedupe_edges_bulk,
73 | dedupe_nodes_bulk,
74 | extract_nodes_and_edges_bulk,
75 | resolve_edge_pointers,
76 | retrieve_previous_episodes_bulk,
77 | )
78 | from graphiti_core.utils.datetime_utils import utc_now
79 | from graphiti_core.utils.maintenance.community_operations import (
80 | build_communities,
81 | remove_communities,
82 | update_community,
83 | )
84 | from graphiti_core.utils.maintenance.edge_operations import (
85 | build_episodic_edges,
86 | extract_edges,
87 | resolve_extracted_edge,
88 | resolve_extracted_edges,
89 | )
90 | from graphiti_core.utils.maintenance.graph_data_operations import (
91 | EPISODE_WINDOW_LEN,
92 | retrieve_episodes,
93 | )
94 | from graphiti_core.utils.maintenance.node_operations import (
95 | extract_attributes_from_nodes,
96 | extract_nodes,
97 | resolve_extracted_nodes,
98 | )
99 | from graphiti_core.utils.ontology_utils.entity_types_utils import validate_entity_types
100 |
101 | logger = logging.getLogger(__name__)
102 |
103 | load_dotenv()
104 |
105 |
106 | class AddEpisodeResults(BaseModel):
107 | episode: EpisodicNode
108 | episodic_edges: list[EpisodicEdge]
109 | nodes: list[EntityNode]
110 | edges: list[EntityEdge]
111 | communities: list[CommunityNode]
112 | community_edges: list[CommunityEdge]
113 |
114 |
115 | class AddBulkEpisodeResults(BaseModel):
116 | episodes: list[EpisodicNode]
117 | episodic_edges: list[EpisodicEdge]
118 | nodes: list[EntityNode]
119 | edges: list[EntityEdge]
120 | communities: list[CommunityNode]
121 | community_edges: list[CommunityEdge]
122 |
123 |
124 | class AddTripletResults(BaseModel):
125 | nodes: list[EntityNode]
126 | edges: list[EntityEdge]
127 |
128 |
129 | class Graphiti:
130 | def __init__(
131 | self,
132 | uri: str | None = None,
133 | user: str | None = None,
134 | password: str | None = None,
135 | llm_client: LLMClient | None = None,
136 | embedder: EmbedderClient | None = None,
137 | cross_encoder: CrossEncoderClient | None = None,
138 | store_raw_episode_content: bool = True,
139 | graph_driver: GraphDriver | None = None,
140 | max_coroutines: int | None = None,
141 | tracer: Tracer | None = None,
142 | trace_span_prefix: str = 'graphiti',
143 | ):
144 | """
145 | Initialize a Graphiti instance.
146 |
147 | This constructor sets up a connection to a graph database and initializes
148 | the LLM client for natural language processing tasks.
149 |
150 | Parameters
151 | ----------
152 | uri : str
153 | The URI of the Neo4j database.
154 | user : str
155 | The username for authenticating with the Neo4j database.
156 | password : str
157 | The password for authenticating with the Neo4j database.
158 | llm_client : LLMClient | None, optional
159 | An instance of LLMClient for natural language processing tasks.
160 | If not provided, a default OpenAIClient will be initialized.
161 | embedder : EmbedderClient | None, optional
162 | An instance of EmbedderClient for embedding tasks.
163 | If not provided, a default OpenAIEmbedder will be initialized.
164 | cross_encoder : CrossEncoderClient | None, optional
165 | An instance of CrossEncoderClient for reranking tasks.
166 | If not provided, a default OpenAIRerankerClient will be initialized.
167 | store_raw_episode_content : bool, optional
168 | Whether to store the raw content of episodes. Defaults to True.
169 | graph_driver : GraphDriver | None, optional
170 | An instance of GraphDriver for database operations.
171 | If not provided, a default Neo4jDriver will be initialized.
172 | max_coroutines : int | None, optional
173 | The maximum number of concurrent operations allowed. Overrides SEMAPHORE_LIMIT set in the environment.
174 | If not set, the Graphiti default is used.
175 | tracer : Tracer | None, optional
176 | An OpenTelemetry tracer instance for distributed tracing. If not provided, tracing is disabled (no-op).
177 | trace_span_prefix : str, optional
178 | Prefix to prepend to all span names. Defaults to 'graphiti'.
179 |
180 | Returns
181 | -------
182 | None
183 |
184 | Notes
185 | -----
186 | This method establishes a connection to a graph database (Neo4j by default) using the provided
187 | credentials. It also sets up the LLM client, either using the provided client
188 | or by creating a default OpenAIClient.
189 |
190 | The default database name is defined during the driver’s construction. If a different database name
191 | is required, it should be specified in the URI or set separately after
192 | initialization.
193 |
194 | The OpenAI API key is expected to be set in the environment variables.
195 | Make sure to set the OPENAI_API_KEY environment variable before initializing
196 | Graphiti if you're using the default OpenAIClient.
197 | """
198 |
199 | if graph_driver:
200 | self.driver = graph_driver
201 | else:
202 | if uri is None:
203 | raise ValueError('uri must be provided when graph_driver is None')
204 | self.driver = Neo4jDriver(uri, user, password)
205 |
206 | self.store_raw_episode_content = store_raw_episode_content
207 | self.max_coroutines = max_coroutines
208 | if llm_client:
209 | self.llm_client = llm_client
210 | else:
211 | self.llm_client = OpenAIClient()
212 | if embedder:
213 | self.embedder = embedder
214 | else:
215 | self.embedder = OpenAIEmbedder()
216 | if cross_encoder:
217 | self.cross_encoder = cross_encoder
218 | else:
219 | self.cross_encoder = OpenAIRerankerClient()
220 |
221 | # Initialize tracer
222 | self.tracer = create_tracer(tracer, trace_span_prefix)
223 |
224 | # Set tracer on clients
225 | self.llm_client.set_tracer(self.tracer)
226 |
227 | self.clients = GraphitiClients(
228 | driver=self.driver,
229 | llm_client=self.llm_client,
230 | embedder=self.embedder,
231 | cross_encoder=self.cross_encoder,
232 | tracer=self.tracer,
233 | )
234 |
235 | # Capture telemetry event
236 | self._capture_initialization_telemetry()
237 |
238 | def _capture_initialization_telemetry(self):
239 | """Capture telemetry event for Graphiti initialization."""
240 | try:
241 | # Detect provider types from class names
242 | llm_provider = self._get_provider_type(self.llm_client)
243 | embedder_provider = self._get_provider_type(self.embedder)
244 | reranker_provider = self._get_provider_type(self.cross_encoder)
245 | database_provider = self._get_provider_type(self.driver)
246 |
247 | properties = {
248 | 'llm_provider': llm_provider,
249 | 'embedder_provider': embedder_provider,
250 | 'reranker_provider': reranker_provider,
251 | 'database_provider': database_provider,
252 | }
253 |
254 | capture_event('graphiti_initialized', properties)
255 | except Exception:
256 | # Silently handle telemetry errors
257 | pass
258 |
259 | def _get_provider_type(self, client) -> str:
260 | """Get provider type from client class name."""
261 | if client is None:
262 | return 'none'
263 |
264 | class_name = client.__class__.__name__.lower()
265 |
266 | # LLM providers
267 | if 'openai' in class_name:
268 | return 'openai'
269 | elif 'azure' in class_name:
270 | return 'azure'
271 | elif 'anthropic' in class_name:
272 | return 'anthropic'
273 | elif 'crossencoder' in class_name:
274 | return 'crossencoder'
275 | elif 'gemini' in class_name:
276 | return 'gemini'
277 | elif 'groq' in class_name:
278 | return 'groq'
279 | # Database providers
280 | elif 'neo4j' in class_name:
281 | return 'neo4j'
282 | elif 'falkor' in class_name:
283 | return 'falkordb'
284 | # Embedder providers
285 | elif 'voyage' in class_name:
286 | return 'voyage'
287 | else:
288 | return 'unknown'
289 |
290 | async def close(self):
291 | """
292 | Close the connection to the Neo4j database.
293 |
294 | This method safely closes the driver connection to the Neo4j database.
295 | It should be called when the Graphiti instance is no longer needed or
296 | when the application is shutting down.
297 |
298 | Parameters
299 | ----------
300 | self
301 |
302 | Returns
303 | -------
304 | None
305 |
306 | Notes
307 | -----
308 | It's important to close the driver connection to release system resources
309 | and ensure that all pending transactions are completed or rolled back.
310 | This method should be called as part of a cleanup process, potentially
311 | in a context manager or a shutdown hook.
312 |
313 | Example:
314 | graphiti = Graphiti(uri, user, password)
315 | try:
316 | # Use graphiti...
317 | finally:
318 | graphiti.close()
319 | """
320 | await self.driver.close()
321 |
322 | async def build_indices_and_constraints(self, delete_existing: bool = False):
323 | """
324 | Build indices and constraints in the Neo4j database.
325 |
326 | This method sets up the necessary indices and constraints in the Neo4j database
327 | to optimize query performance and ensure data integrity for the knowledge graph.
328 |
329 | Parameters
330 | ----------
331 | self
332 | delete_existing : bool, optional
333 | Whether to clear existing indices before creating new ones.
334 |
335 |
336 | Returns
337 | -------
338 | None
339 |
340 | Notes
341 | -----
342 | This method should typically be called once during the initial setup of the
343 | knowledge graph or when updating the database schema. It uses the
344 | driver's `build_indices_and_constraints` method to perform
345 | the actual database operations.
346 |
347 | The specific indices and constraints created depend on the implementation
348 | of the driver's `build_indices_and_constraints` method. Refer to the specific
349 | driver documentation for details on the exact database schema modifications.
350 |
351 | Caution: Running this method on a large existing database may take some time
352 | and could impact database performance during execution.
353 | """
354 | await self.driver.build_indices_and_constraints(delete_existing)
355 |
356 | async def _extract_and_resolve_nodes(
357 | self,
358 | episode: EpisodicNode,
359 | previous_episodes: list[EpisodicNode],
360 | entity_types: dict[str, type[BaseModel]] | None,
361 | excluded_entity_types: list[str] | None,
362 | ) -> tuple[list[EntityNode], dict[str, str], list[tuple[EntityNode, EntityNode]]]:
363 | """Extract nodes from episode and resolve against existing graph."""
364 | extracted_nodes = await extract_nodes(
365 | self.clients, episode, previous_episodes, entity_types, excluded_entity_types
366 | )
367 |
368 | nodes, uuid_map, duplicates = await resolve_extracted_nodes(
369 | self.clients,
370 | extracted_nodes,
371 | episode,
372 | previous_episodes,
373 | entity_types,
374 | )
375 |
376 | return nodes, uuid_map, duplicates
377 |
378 | async def _extract_and_resolve_edges(
379 | self,
380 | episode: EpisodicNode,
381 | extracted_nodes: list[EntityNode],
382 | previous_episodes: list[EpisodicNode],
383 | edge_type_map: dict[tuple[str, str], list[str]],
384 | group_id: str,
385 | edge_types: dict[str, type[BaseModel]] | None,
386 | nodes: list[EntityNode],
387 | uuid_map: dict[str, str],
388 | custom_extraction_instructions: str | None = None,
389 | ) -> tuple[list[EntityEdge], list[EntityEdge]]:
390 | """Extract edges from episode and resolve against existing graph."""
391 | extracted_edges = await extract_edges(
392 | self.clients,
393 | episode,
394 | extracted_nodes,
395 | previous_episodes,
396 | edge_type_map,
397 | group_id,
398 | edge_types,
399 | custom_extraction_instructions,
400 | )
401 |
402 | edges = resolve_edge_pointers(extracted_edges, uuid_map)
403 |
404 | resolved_edges, invalidated_edges = await resolve_extracted_edges(
405 | self.clients,
406 | edges,
407 | episode,
408 | nodes,
409 | edge_types or {},
410 | edge_type_map,
411 | )
412 |
413 | return resolved_edges, invalidated_edges
414 |
415 | async def _process_episode_data(
416 | self,
417 | episode: EpisodicNode,
418 | nodes: list[EntityNode],
419 | entity_edges: list[EntityEdge],
420 | now: datetime,
421 | ) -> tuple[list[EpisodicEdge], EpisodicNode]:
422 | """Process and save episode data to the graph."""
423 | episodic_edges = build_episodic_edges(nodes, episode.uuid, now)
424 | episode.entity_edges = [edge.uuid for edge in entity_edges]
425 |
426 | if not self.store_raw_episode_content:
427 | episode.content = ''
428 |
429 | await add_nodes_and_edges_bulk(
430 | self.driver,
431 | [episode],
432 | episodic_edges,
433 | nodes,
434 | entity_edges,
435 | self.embedder,
436 | )
437 |
438 | return episodic_edges, episode
439 |
440 | async def _extract_and_dedupe_nodes_bulk(
441 | self,
442 | episode_context: list[tuple[EpisodicNode, list[EpisodicNode]]],
443 | edge_type_map: dict[tuple[str, str], list[str]],
444 | edge_types: dict[str, type[BaseModel]] | None,
445 | entity_types: dict[str, type[BaseModel]] | None,
446 | excluded_entity_types: list[str] | None,
447 | custom_extraction_instructions: str | None = None,
448 | ) -> tuple[
449 | dict[str, list[EntityNode]],
450 | dict[str, str],
451 | list[list[EntityEdge]],
452 | ]:
453 | """Extract nodes and edges from all episodes and deduplicate."""
454 | # Extract all nodes and edges for each episode
455 | extracted_nodes_bulk, extracted_edges_bulk = await extract_nodes_and_edges_bulk(
456 | self.clients,
457 | episode_context,
458 | edge_type_map=edge_type_map,
459 | edge_types=edge_types,
460 | entity_types=entity_types,
461 | excluded_entity_types=excluded_entity_types,
462 | custom_extraction_instructions=custom_extraction_instructions,
463 | )
464 |
465 | # Dedupe extracted nodes in memory
466 | nodes_by_episode, uuid_map = await dedupe_nodes_bulk(
467 | self.clients, extracted_nodes_bulk, episode_context, entity_types
468 | )
469 |
470 | return nodes_by_episode, uuid_map, extracted_edges_bulk
471 |
472 | async def _resolve_nodes_and_edges_bulk(
473 | self,
474 | nodes_by_episode: dict[str, list[EntityNode]],
475 | edges_by_episode: dict[str, list[EntityEdge]],
476 | episode_context: list[tuple[EpisodicNode, list[EpisodicNode]]],
477 | entity_types: dict[str, type[BaseModel]] | None,
478 | edge_types: dict[str, type[BaseModel]] | None,
479 | edge_type_map: dict[tuple[str, str], list[str]],
480 | episodes: list[EpisodicNode],
481 | ) -> tuple[list[EntityNode], list[EntityEdge], list[EntityEdge], dict[str, str]]:
482 | """Resolve nodes and edges against the existing graph."""
483 | nodes_by_uuid: dict[str, EntityNode] = {
484 | node.uuid: node for nodes in nodes_by_episode.values() for node in nodes
485 | }
486 |
487 | # Get unique nodes per episode
488 | nodes_by_episode_unique: dict[str, list[EntityNode]] = {}
489 | nodes_uuid_set: set[str] = set()
490 | for episode, _ in episode_context:
491 | nodes_by_episode_unique[episode.uuid] = []
492 | nodes = [nodes_by_uuid[node.uuid] for node in nodes_by_episode[episode.uuid]]
493 | for node in nodes:
494 | if node.uuid not in nodes_uuid_set:
495 | nodes_by_episode_unique[episode.uuid].append(node)
496 | nodes_uuid_set.add(node.uuid)
497 |
498 | # Resolve nodes
499 | node_results = await semaphore_gather(
500 | *[
501 | resolve_extracted_nodes(
502 | self.clients,
503 | nodes_by_episode_unique[episode.uuid],
504 | episode,
505 | previous_episodes,
506 | entity_types,
507 | )
508 | for episode, previous_episodes in episode_context
509 | ]
510 | )
511 |
512 | resolved_nodes: list[EntityNode] = []
513 | uuid_map: dict[str, str] = {}
514 | for result in node_results:
515 | resolved_nodes.extend(result[0])
516 | uuid_map.update(result[1])
517 |
518 | # Update nodes_by_uuid with resolved nodes
519 | for resolved_node in resolved_nodes:
520 | nodes_by_uuid[resolved_node.uuid] = resolved_node
521 |
522 | # Update nodes_by_episode_unique with resolved pointers
523 | for episode_uuid, nodes in nodes_by_episode_unique.items():
524 | updated_nodes: list[EntityNode] = []
525 | for node in nodes:
526 | updated_node_uuid = uuid_map.get(node.uuid, node.uuid)
527 | updated_node = nodes_by_uuid[updated_node_uuid]
528 | updated_nodes.append(updated_node)
529 | nodes_by_episode_unique[episode_uuid] = updated_nodes
530 |
531 | # Extract attributes for resolved nodes
532 | hydrated_nodes_results: list[list[EntityNode]] = await semaphore_gather(
533 | *[
534 | extract_attributes_from_nodes(
535 | self.clients,
536 | nodes_by_episode_unique[episode.uuid],
537 | episode,
538 | previous_episodes,
539 | entity_types,
540 | )
541 | for episode, previous_episodes in episode_context
542 | ]
543 | )
544 |
545 | final_hydrated_nodes = [node for nodes in hydrated_nodes_results for node in nodes]
546 |
547 | # Resolve edges with updated pointers
548 | edges_by_episode_unique: dict[str, list[EntityEdge]] = {}
549 | edges_uuid_set: set[str] = set()
550 | for episode_uuid, edges in edges_by_episode.items():
551 | edges_with_updated_pointers = resolve_edge_pointers(edges, uuid_map)
552 | edges_by_episode_unique[episode_uuid] = []
553 |
554 | for edge in edges_with_updated_pointers:
555 | if edge.uuid not in edges_uuid_set:
556 | edges_by_episode_unique[episode_uuid].append(edge)
557 | edges_uuid_set.add(edge.uuid)
558 |
559 | edge_results = await semaphore_gather(
560 | *[
561 | resolve_extracted_edges(
562 | self.clients,
563 | edges_by_episode_unique[episode.uuid],
564 | episode,
565 | final_hydrated_nodes,
566 | edge_types or {},
567 | edge_type_map,
568 | )
569 | for episode in episodes
570 | ]
571 | )
572 |
573 | resolved_edges: list[EntityEdge] = []
574 | invalidated_edges: list[EntityEdge] = []
575 | for result in edge_results:
576 | resolved_edges.extend(result[0])
577 | invalidated_edges.extend(result[1])
578 |
579 | return final_hydrated_nodes, resolved_edges, invalidated_edges, uuid_map
580 |
581 | @handle_multiple_group_ids
582 | async def retrieve_episodes(
583 | self,
584 | reference_time: datetime,
585 | last_n: int = EPISODE_WINDOW_LEN,
586 | group_ids: list[str] | None = None,
587 | source: EpisodeType | None = None,
588 | driver: GraphDriver | None = None,
589 | ) -> list[EpisodicNode]:
590 | """
591 | Retrieve the last n episodic nodes from the graph.
592 |
593 | This method fetches a specified number of the most recent episodic nodes
594 | from the graph, relative to the given reference time.
595 |
596 | Parameters
597 | ----------
598 | reference_time : datetime
599 | The reference time to retrieve episodes before.
600 | last_n : int, optional
601 | The number of episodes to retrieve. Defaults to EPISODE_WINDOW_LEN.
602 | group_ids : list[str | None], optional
603 | The group ids to return data from.
604 |
605 | Returns
606 | -------
607 | list[EpisodicNode]
608 | A list of the most recent EpisodicNode objects.
609 |
610 | Notes
611 | -----
612 | The actual retrieval is performed by the `retrieve_episodes` function
613 | from the `graphiti_core.utils` module.
614 | """
615 | if driver is None:
616 | driver = self.clients.driver
617 |
618 | return await retrieve_episodes(driver, reference_time, last_n, group_ids, source)
619 |
620 | async def add_episode(
621 | self,
622 | name: str,
623 | episode_body: str,
624 | source_description: str,
625 | reference_time: datetime,
626 | source: EpisodeType = EpisodeType.message,
627 | group_id: str | None = None,
628 | uuid: str | None = None,
629 | update_communities: bool = False,
630 | entity_types: dict[str, type[BaseModel]] | None = None,
631 | excluded_entity_types: list[str] | None = None,
632 | previous_episode_uuids: list[str] | None = None,
633 | edge_types: dict[str, type[BaseModel]] | None = None,
634 | edge_type_map: dict[tuple[str, str], list[str]] | None = None,
635 | custom_extraction_instructions: str | None = None,
636 | ) -> AddEpisodeResults:
637 | """
638 | Process an episode and update the graph.
639 |
640 | This method extracts information from the episode, creates nodes and edges,
641 | and updates the graph database accordingly.
642 |
643 | Parameters
644 | ----------
645 | name : str
646 | The name of the episode.
647 | episode_body : str
648 | The content of the episode.
649 | source_description : str
650 | A description of the episode's source.
651 | reference_time : datetime
652 | The reference time for the episode.
653 | source : EpisodeType, optional
654 | The type of the episode. Defaults to EpisodeType.message.
655 | group_id : str | None
656 | An id for the graph partition the episode is a part of.
657 | uuid : str | None
658 | Optional uuid of the episode.
659 | update_communities : bool
660 | Optional. Whether to update communities with new node information
661 | entity_types : dict[str, BaseModel] | None
662 | Optional. Dictionary mapping entity type names to their Pydantic model definitions.
663 | excluded_entity_types : list[str] | None
664 | Optional. List of entity type names to exclude from the graph. Entities classified
665 | into these types will not be added to the graph. Can include 'Entity' to exclude
666 | the default entity type.
667 | previous_episode_uuids : list[str] | None
668 | Optional. list of episode uuids to use as the previous episodes. If this is not provided,
669 | the most recent episodes by created_at date will be used.
670 | custom_extraction_instructions : str | None
671 | Optional. Custom extraction instructions string to be included in the extract entities and extract edges prompts.
672 | This allows for additional instructions or context to guide the extraction process.
673 |
674 | Returns
675 | -------
676 | None
677 |
678 | Notes
679 | -----
680 | This method performs several steps including node extraction, edge extraction,
681 | deduplication, and database updates. It also handles embedding generation
682 | and edge invalidation.
683 |
684 | It is recommended to run this method as a background process, such as in a queue.
685 | It's important that each episode is added sequentially and awaited before adding
686 | the next one. For web applications, consider using FastAPI's background tasks
687 | or a dedicated task queue like Celery for this purpose.
688 |
689 | Example using FastAPI background tasks:
690 | @app.post("/add_episode")
691 | async def add_episode_endpoint(episode_data: EpisodeData):
692 | background_tasks.add_task(graphiti.add_episode, **episode_data.dict())
693 | return {"message": "Episode processing started"}
694 | """
695 | start = time()
696 | now = utc_now()
697 |
698 | validate_entity_types(entity_types)
699 | validate_excluded_entity_types(excluded_entity_types, entity_types)
700 |
701 | if group_id is None:
702 | # if group_id is None, use the default group id by the provider
703 | # and the preset database name will be used
704 | group_id = get_default_group_id(self.driver.provider)
705 | else:
706 | validate_group_id(group_id)
707 | if group_id != self.driver._database:
708 | # if group_id is provided, use it as the database name
709 | self.driver = self.driver.clone(database=group_id)
710 | self.clients.driver = self.driver
711 |
712 | with self.tracer.start_span('add_episode') as span:
713 | try:
714 | # Retrieve previous episodes for context
715 | previous_episodes = (
716 | await self.retrieve_episodes(
717 | reference_time,
718 | last_n=RELEVANT_SCHEMA_LIMIT,
719 | group_ids=[group_id],
720 | source=source,
721 | )
722 | if previous_episode_uuids is None
723 | else await EpisodicNode.get_by_uuids(self.driver, previous_episode_uuids)
724 | )
725 |
726 | # Get or create episode
727 | episode = (
728 | await EpisodicNode.get_by_uuid(self.driver, uuid)
729 | if uuid is not None
730 | else EpisodicNode(
731 | name=name,
732 | group_id=group_id,
733 | labels=[],
734 | source=source,
735 | content=episode_body,
736 | source_description=source_description,
737 | created_at=now,
738 | valid_at=reference_time,
739 | )
740 | )
741 |
742 | # Create default edge type map
743 | edge_type_map_default = (
744 | {('Entity', 'Entity'): list(edge_types.keys())}
745 | if edge_types is not None
746 | else {('Entity', 'Entity'): []}
747 | )
748 |
749 | # Extract and resolve nodes
750 | extracted_nodes = await extract_nodes(
751 | self.clients,
752 | episode,
753 | previous_episodes,
754 | entity_types,
755 | excluded_entity_types,
756 | custom_extraction_instructions,
757 | )
758 |
759 | nodes, uuid_map, _ = await resolve_extracted_nodes(
760 | self.clients,
761 | extracted_nodes,
762 | episode,
763 | previous_episodes,
764 | entity_types,
765 | )
766 |
767 | # Extract and resolve edges in parallel with attribute extraction
768 | resolved_edges, invalidated_edges = await self._extract_and_resolve_edges(
769 | episode,
770 | extracted_nodes,
771 | previous_episodes,
772 | edge_type_map or edge_type_map_default,
773 | group_id,
774 | edge_types,
775 | nodes,
776 | uuid_map,
777 | custom_extraction_instructions,
778 | )
779 |
780 | # Extract node attributes
781 | hydrated_nodes = await extract_attributes_from_nodes(
782 | self.clients, nodes, episode, previous_episodes, entity_types
783 | )
784 |
785 | entity_edges = resolved_edges + invalidated_edges
786 |
787 | # Process and save episode data
788 | episodic_edges, episode = await self._process_episode_data(
789 | episode, hydrated_nodes, entity_edges, now
790 | )
791 |
792 | # Update communities if requested
793 | communities = []
794 | community_edges = []
795 | if update_communities:
796 | communities, community_edges = await semaphore_gather(
797 | *[
798 | update_community(self.driver, self.llm_client, self.embedder, node)
799 | for node in nodes
800 | ],
801 | max_coroutines=self.max_coroutines,
802 | )
803 |
804 | end = time()
805 |
806 | # Add span attributes
807 | span.add_attributes(
808 | {
809 | 'episode.uuid': episode.uuid,
810 | 'episode.source': source.value,
811 | 'episode.reference_time': reference_time.isoformat(),
812 | 'group_id': group_id,
813 | 'node.count': len(hydrated_nodes),
814 | 'edge.count': len(entity_edges),
815 | 'edge.invalidated_count': len(invalidated_edges),
816 | 'previous_episodes.count': len(previous_episodes),
817 | 'entity_types.count': len(entity_types) if entity_types else 0,
818 | 'edge_types.count': len(edge_types) if edge_types else 0,
819 | 'update_communities': update_communities,
820 | 'communities.count': len(communities) if update_communities else 0,
821 | 'duration_ms': (end - start) * 1000,
822 | }
823 | )
824 |
825 | logger.info(f'Completed add_episode in {(end - start) * 1000} ms')
826 |
827 | return AddEpisodeResults(
828 | episode=episode,
829 | episodic_edges=episodic_edges,
830 | nodes=hydrated_nodes,
831 | edges=entity_edges,
832 | communities=communities,
833 | community_edges=community_edges,
834 | )
835 |
836 | except Exception as e:
837 | span.set_status('error', str(e))
838 | span.record_exception(e)
839 | raise e
840 |
841 | async def add_episode_bulk(
842 | self,
843 | bulk_episodes: list[RawEpisode],
844 | group_id: str | None = None,
845 | entity_types: dict[str, type[BaseModel]] | None = None,
846 | excluded_entity_types: list[str] | None = None,
847 | edge_types: dict[str, type[BaseModel]] | None = None,
848 | edge_type_map: dict[tuple[str, str], list[str]] | None = None,
849 | custom_extraction_instructions: str | None = None,
850 | ) -> AddBulkEpisodeResults:
851 | """
852 | Process multiple episodes in bulk and update the graph.
853 |
854 | This method extracts information from multiple episodes, creates nodes and edges,
855 | and updates the graph database accordingly, all in a single batch operation.
856 |
857 | Parameters
858 | ----------
859 | bulk_episodes : list[RawEpisode]
860 | A list of RawEpisode objects to be processed and added to the graph.
861 | group_id : str | None
862 | An id for the graph partition the episode is a part of.
863 | entity_types : dict[str, type[BaseModel]] | None
864 | Optional. A dictionary mapping entity type names to Pydantic models.
865 | excluded_entity_types : list[str] | None
866 | Optional. A list of entity type names to exclude from extraction.
867 | edge_types : dict[str, type[BaseModel]] | None
868 | Optional. A dictionary mapping edge type names to Pydantic models.
869 | edge_type_map : dict[tuple[str, str], list[str]] | None
870 | Optional. A mapping of (source_type, target_type) to allowed edge types.
871 | custom_extraction_instructions : str | None
872 | Optional. Custom extraction instructions string to be included in the
873 | extract entities and extract edges prompts. This allows for additional
874 | instructions or context to guide the extraction process.
875 |
876 | Returns
877 | -------
878 | AddBulkEpisodeResults
879 |
880 | Notes
881 | -----
882 | This method performs several steps including:
883 | - Saving all episodes to the database
884 | - Retrieving previous episode context for each new episode
885 | - Extracting nodes and edges from all episodes
886 | - Generating embeddings for nodes and edges
887 | - Deduplicating nodes and edges
888 | - Saving nodes, episodic edges, and entity edges to the knowledge graph
889 |
890 | This bulk operation is designed for efficiency when processing multiple episodes
891 | at once. However, it's important to ensure that the bulk operation doesn't
892 | overwhelm system resources. Consider implementing rate limiting or chunking for
893 | very large batches of episodes.
894 |
895 | Important: This method does not perform edge invalidation or date extraction steps.
896 | If these operations are required, use the `add_episode` method instead for each
897 | individual episode.
898 | """
899 | with self.tracer.start_span('add_episode_bulk') as bulk_span:
900 | bulk_span.add_attributes({'episode.count': len(bulk_episodes)})
901 |
902 | try:
903 | start = time()
904 | now = utc_now()
905 |
906 | # if group_id is None, use the default group id by the provider
907 | if group_id is None:
908 | group_id = get_default_group_id(self.driver.provider)
909 | else:
910 | validate_group_id(group_id)
911 | if group_id != self.driver._database:
912 | # if group_id is provided, use it as the database name
913 | self.driver = self.driver.clone(database=group_id)
914 | self.clients.driver = self.driver
915 |
916 | # Create default edge type map
917 | edge_type_map_default = (
918 | {('Entity', 'Entity'): list(edge_types.keys())}
919 | if edge_types is not None
920 | else {('Entity', 'Entity'): []}
921 | )
922 |
923 | episodes = [
924 | await EpisodicNode.get_by_uuid(self.driver, episode.uuid)
925 | if episode.uuid is not None
926 | else EpisodicNode(
927 | name=episode.name,
928 | labels=[],
929 | source=episode.source,
930 | content=episode.content,
931 | source_description=episode.source_description,
932 | group_id=group_id,
933 | created_at=now,
934 | valid_at=episode.reference_time,
935 | )
936 | for episode in bulk_episodes
937 | ]
938 |
939 | # Save all episodes
940 | await add_nodes_and_edges_bulk(
941 | driver=self.driver,
942 | episodic_nodes=episodes,
943 | episodic_edges=[],
944 | entity_nodes=[],
945 | entity_edges=[],
946 | embedder=self.embedder,
947 | )
948 |
949 | # Get previous episode context for each episode
950 | episode_context = await retrieve_previous_episodes_bulk(self.driver, episodes)
951 |
952 | # Extract and dedupe nodes and edges
953 | (
954 | nodes_by_episode,
955 | uuid_map,
956 | extracted_edges_bulk,
957 | ) = await self._extract_and_dedupe_nodes_bulk(
958 | episode_context,
959 | edge_type_map or edge_type_map_default,
960 | edge_types,
961 | entity_types,
962 | excluded_entity_types,
963 | custom_extraction_instructions,
964 | )
965 |
966 | # Create Episodic Edges
967 | episodic_edges: list[EpisodicEdge] = []
968 | for episode_uuid, nodes in nodes_by_episode.items():
969 | episodic_edges.extend(build_episodic_edges(nodes, episode_uuid, now))
970 |
971 | # Re-map edge pointers and dedupe edges
972 | extracted_edges_bulk_updated: list[list[EntityEdge]] = [
973 | resolve_edge_pointers(edges, uuid_map) for edges in extracted_edges_bulk
974 | ]
975 |
976 | edges_by_episode = await dedupe_edges_bulk(
977 | self.clients,
978 | extracted_edges_bulk_updated,
979 | episode_context,
980 | [],
981 | edge_types or {},
982 | edge_type_map or edge_type_map_default,
983 | )
984 |
985 | # Resolve nodes and edges against the existing graph
986 | (
987 | final_hydrated_nodes,
988 | resolved_edges,
989 | invalidated_edges,
990 | final_uuid_map,
991 | ) = await self._resolve_nodes_and_edges_bulk(
992 | nodes_by_episode,
993 | edges_by_episode,
994 | episode_context,
995 | entity_types,
996 | edge_types,
997 | edge_type_map or edge_type_map_default,
998 | episodes,
999 | )
1000 |
1001 | # Resolved pointers for episodic edges
1002 | resolved_episodic_edges = resolve_edge_pointers(episodic_edges, final_uuid_map)
1003 |
1004 | # save data to KG
1005 | await add_nodes_and_edges_bulk(
1006 | self.driver,
1007 | episodes,
1008 | resolved_episodic_edges,
1009 | final_hydrated_nodes,
1010 | resolved_edges + invalidated_edges,
1011 | self.embedder,
1012 | )
1013 |
1014 | end = time()
1015 |
1016 | # Add span attributes
1017 | bulk_span.add_attributes(
1018 | {
1019 | 'group_id': group_id,
1020 | 'node.count': len(final_hydrated_nodes),
1021 | 'edge.count': len(resolved_edges + invalidated_edges),
1022 | 'duration_ms': (end - start) * 1000,
1023 | }
1024 | )
1025 |
1026 | logger.info(f'Completed add_episode_bulk in {(end - start) * 1000} ms')
1027 |
1028 | return AddBulkEpisodeResults(
1029 | episodes=episodes,
1030 | episodic_edges=resolved_episodic_edges,
1031 | nodes=final_hydrated_nodes,
1032 | edges=resolved_edges + invalidated_edges,
1033 | communities=[],
1034 | community_edges=[],
1035 | )
1036 |
1037 | except Exception as e:
1038 | bulk_span.set_status('error', str(e))
1039 | bulk_span.record_exception(e)
1040 | raise e
1041 |
1042 | @handle_multiple_group_ids
1043 | async def build_communities(
1044 | self, group_ids: list[str] | None = None, driver: GraphDriver | None = None
1045 | ) -> tuple[list[CommunityNode], list[CommunityEdge]]:
1046 | """
1047 | Use a community clustering algorithm to find communities of nodes. Create community nodes summarising
1048 | the content of these communities.
1049 | ----------
1050 | group_ids : list[str] | None
1051 | Optional. Create communities only for the listed group_ids. If blank the entire graph will be used.
1052 | """
1053 | if driver is None:
1054 | driver = self.clients.driver
1055 |
1056 | # Clear existing communities
1057 | await remove_communities(driver)
1058 |
1059 | community_nodes, community_edges = await build_communities(
1060 | driver, self.llm_client, group_ids
1061 | )
1062 |
1063 | await semaphore_gather(
1064 | *[node.generate_name_embedding(self.embedder) for node in community_nodes],
1065 | max_coroutines=self.max_coroutines,
1066 | )
1067 |
1068 | await semaphore_gather(
1069 | *[node.save(driver) for node in community_nodes],
1070 | max_coroutines=self.max_coroutines,
1071 | )
1072 | await semaphore_gather(
1073 | *[edge.save(driver) for edge in community_edges],
1074 | max_coroutines=self.max_coroutines,
1075 | )
1076 |
1077 | return community_nodes, community_edges
1078 |
1079 | @handle_multiple_group_ids
1080 | async def search(
1081 | self,
1082 | query: str,
1083 | center_node_uuid: str | None = None,
1084 | group_ids: list[str] | None = None,
1085 | num_results=DEFAULT_SEARCH_LIMIT,
1086 | search_filter: SearchFilters | None = None,
1087 | driver: GraphDriver | None = None,
1088 | ) -> list[EntityEdge]:
1089 | """
1090 | Perform a hybrid search on the knowledge graph.
1091 |
1092 | This method executes a search query on the graph, combining vector and
1093 | text-based search techniques to retrieve relevant facts, returning the edges as a string.
1094 |
1095 | This is our basic out-of-the-box search, for more robust results we recommend using our more advanced
1096 | search method graphiti.search_().
1097 |
1098 | Parameters
1099 | ----------
1100 | query : str
1101 | The search query string.
1102 | center_node_uuid: str, optional
1103 | Facts will be reranked based on proximity to this node
1104 | group_ids : list[str | None] | None, optional
1105 | The graph partitions to return data from.
1106 | num_results : int, optional
1107 | The maximum number of results to return. Defaults to 10.
1108 |
1109 | Returns
1110 | -------
1111 | list
1112 | A list of EntityEdge objects that are relevant to the search query.
1113 |
1114 | Notes
1115 | -----
1116 | This method uses a SearchConfig with num_episodes set to 0 and
1117 | num_results set to the provided num_results parameter.
1118 |
1119 | The search is performed using the current date and time as the reference
1120 | point for temporal relevance.
1121 | """
1122 | search_config = (
1123 | EDGE_HYBRID_SEARCH_RRF if center_node_uuid is None else EDGE_HYBRID_SEARCH_NODE_DISTANCE
1124 | )
1125 | search_config.limit = num_results
1126 |
1127 | edges = (
1128 | await search(
1129 | self.clients,
1130 | query,
1131 | group_ids,
1132 | search_config,
1133 | search_filter if search_filter is not None else SearchFilters(),
1134 | driver=driver,
1135 | center_node_uuid=center_node_uuid,
1136 | )
1137 | ).edges
1138 |
1139 | return edges
1140 |
1141 | async def _search(
1142 | self,
1143 | query: str,
1144 | config: SearchConfig,
1145 | group_ids: list[str] | None = None,
1146 | center_node_uuid: str | None = None,
1147 | bfs_origin_node_uuids: list[str] | None = None,
1148 | search_filter: SearchFilters | None = None,
1149 | ) -> SearchResults:
1150 | """DEPRECATED"""
1151 | return await self.search_(
1152 | query, config, group_ids, center_node_uuid, bfs_origin_node_uuids, search_filter
1153 | )
1154 |
1155 | @handle_multiple_group_ids
1156 | async def search_(
1157 | self,
1158 | query: str,
1159 | config: SearchConfig = COMBINED_HYBRID_SEARCH_CROSS_ENCODER,
1160 | group_ids: list[str] | None = None,
1161 | center_node_uuid: str | None = None,
1162 | bfs_origin_node_uuids: list[str] | None = None,
1163 | search_filter: SearchFilters | None = None,
1164 | driver: GraphDriver | None = None,
1165 | ) -> SearchResults:
1166 | """search_ (replaces _search) is our advanced search method that returns Graph objects (nodes and edges) rather
1167 | than a list of facts. This endpoint allows the end user to utilize more advanced features such as filters and
1168 | different search and reranker methodologies across different layers in the graph.
1169 |
1170 | For different config recipes refer to search/search_config_recipes.
1171 | """
1172 |
1173 | return await search(
1174 | self.clients,
1175 | query,
1176 | group_ids,
1177 | config,
1178 | search_filter if search_filter is not None else SearchFilters(),
1179 | center_node_uuid,
1180 | bfs_origin_node_uuids,
1181 | driver=driver,
1182 | )
1183 |
1184 | async def get_nodes_and_edges_by_episode(self, episode_uuids: list[str]) -> SearchResults:
1185 | episodes = await EpisodicNode.get_by_uuids(self.driver, episode_uuids)
1186 |
1187 | edges_list = await semaphore_gather(
1188 | *[EntityEdge.get_by_uuids(self.driver, episode.entity_edges) for episode in episodes],
1189 | max_coroutines=self.max_coroutines,
1190 | )
1191 |
1192 | edges: list[EntityEdge] = [edge for lst in edges_list for edge in lst]
1193 |
1194 | nodes = await get_mentioned_nodes(self.driver, episodes)
1195 |
1196 | return SearchResults(edges=edges, nodes=nodes)
1197 |
1198 | async def add_triplet(
1199 | self, source_node: EntityNode, edge: EntityEdge, target_node: EntityNode
1200 | ) -> AddTripletResults:
1201 | if source_node.name_embedding is None:
1202 | await source_node.generate_name_embedding(self.embedder)
1203 | if target_node.name_embedding is None:
1204 | await target_node.generate_name_embedding(self.embedder)
1205 | if edge.fact_embedding is None:
1206 | await edge.generate_embedding(self.embedder)
1207 |
1208 | try:
1209 | resolved_source = await EntityNode.get_by_uuid(self.driver, source_node.uuid)
1210 | except NodeNotFoundError:
1211 | resolved_source_nodes, _, _ = await resolve_extracted_nodes(
1212 | self.clients,
1213 | [source_node],
1214 | )
1215 | resolved_source = resolved_source_nodes[0]
1216 |
1217 | try:
1218 | resolved_target = await EntityNode.get_by_uuid(self.driver, target_node.uuid)
1219 | except NodeNotFoundError:
1220 | resolved_target_nodes, _, _ = await resolve_extracted_nodes(
1221 | self.clients,
1222 | [target_node],
1223 | )
1224 | resolved_target = resolved_target_nodes[0]
1225 |
1226 | nodes = [resolved_source, resolved_target]
1227 |
1228 | # Merge user-provided properties from original nodes into resolved nodes (excluding uuid)
1229 | # Update attributes dictionary (merge rather than replace)
1230 | if source_node.attributes:
1231 | resolved_source.attributes.update(source_node.attributes)
1232 | if target_node.attributes:
1233 | resolved_target.attributes.update(target_node.attributes)
1234 |
1235 | # Update summary if provided by user (non-empty string)
1236 | if source_node.summary:
1237 | resolved_source.summary = source_node.summary
1238 | if target_node.summary:
1239 | resolved_target.summary = target_node.summary
1240 |
1241 | # Update labels (merge with existing)
1242 | if source_node.labels:
1243 | resolved_source.labels = list(set(resolved_source.labels) | set(source_node.labels))
1244 | if target_node.labels:
1245 | resolved_target.labels = list(set(resolved_target.labels) | set(target_node.labels))
1246 |
1247 | edge.source_node_uuid = resolved_source.uuid
1248 | edge.target_node_uuid = resolved_target.uuid
1249 |
1250 | valid_edges = await EntityEdge.get_between_nodes(
1251 | self.driver, edge.source_node_uuid, edge.target_node_uuid
1252 | )
1253 |
1254 | related_edges = (
1255 | await search(
1256 | self.clients,
1257 | edge.fact,
1258 | group_ids=[edge.group_id],
1259 | config=EDGE_HYBRID_SEARCH_RRF,
1260 | search_filter=SearchFilters(edge_uuids=[edge.uuid for edge in valid_edges]),
1261 | )
1262 | ).edges
1263 | existing_edges = (
1264 | await search(
1265 | self.clients,
1266 | edge.fact,
1267 | group_ids=[edge.group_id],
1268 | config=EDGE_HYBRID_SEARCH_RRF,
1269 | search_filter=SearchFilters(),
1270 | )
1271 | ).edges
1272 |
1273 | resolved_edge, invalidated_edges, _ = await resolve_extracted_edge(
1274 | self.llm_client,
1275 | edge,
1276 | related_edges,
1277 | existing_edges,
1278 | EpisodicNode(
1279 | name='',
1280 | source=EpisodeType.text,
1281 | source_description='',
1282 | content='',
1283 | valid_at=edge.valid_at or utc_now(),
1284 | entity_edges=[],
1285 | group_id=edge.group_id,
1286 | ),
1287 | None,
1288 | None,
1289 | )
1290 |
1291 | edges: list[EntityEdge] = [resolved_edge] + invalidated_edges
1292 |
1293 | await create_entity_edge_embeddings(self.embedder, edges)
1294 | await create_entity_node_embeddings(self.embedder, nodes)
1295 |
1296 | await add_nodes_and_edges_bulk(self.driver, [], [], nodes, edges, self.embedder)
1297 | return AddTripletResults(edges=edges, nodes=nodes)
1298 |
1299 | async def remove_episode(self, episode_uuid: str):
1300 | # Find the episode to be deleted
1301 | episode = await EpisodicNode.get_by_uuid(self.driver, episode_uuid)
1302 |
1303 | # Find edges mentioned by the episode
1304 | edges = await EntityEdge.get_by_uuids(self.driver, episode.entity_edges)
1305 |
1306 | # We should only delete edges created by the episode
1307 | edges_to_delete: list[EntityEdge] = []
1308 | for edge in edges:
1309 | if edge.episodes and edge.episodes[0] == episode.uuid:
1310 | edges_to_delete.append(edge)
1311 |
1312 | # Find nodes mentioned by the episode
1313 | nodes = await get_mentioned_nodes(self.driver, [episode])
1314 | # We should delete all nodes that are only mentioned in the deleted episode
1315 | nodes_to_delete: list[EntityNode] = []
1316 | for node in nodes:
1317 | query: LiteralString = 'MATCH (e:Episodic)-[:MENTIONS]->(n:Entity {uuid: $uuid}) RETURN count(*) AS episode_count'
1318 | records, _, _ = await self.driver.execute_query(query, uuid=node.uuid, routing_='r')
1319 |
1320 | for record in records:
1321 | if record['episode_count'] == 1:
1322 | nodes_to_delete.append(node)
1323 |
1324 | await Edge.delete_by_uuids(self.driver, [edge.uuid for edge in edges_to_delete])
1325 | await Node.delete_by_uuids(self.driver, [node.uuid for node in nodes_to_delete])
1326 |
1327 | await episode.delete(self.driver)
1328 |
```
--------------------------------------------------------------------------------
/tests/test_graphiti_mock.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Copyright 2024, Zep Software, Inc.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 |
17 | from datetime import datetime, timedelta
18 | from unittest.mock import Mock
19 |
20 | import numpy as np
21 | import pytest
22 |
23 | from graphiti_core.cross_encoder.client import CrossEncoderClient
24 | from graphiti_core.edges import CommunityEdge, EntityEdge, EpisodicEdge
25 | from graphiti_core.graphiti import Graphiti
26 | from graphiti_core.llm_client import LLMClient
27 | from graphiti_core.nodes import CommunityNode, EntityNode, EpisodeType, EpisodicNode
28 | from graphiti_core.search.search_filters import ComparisonOperator, DateFilter, SearchFilters
29 | from graphiti_core.search.search_utils import (
30 | community_fulltext_search,
31 | community_similarity_search,
32 | edge_bfs_search,
33 | edge_fulltext_search,
34 | edge_similarity_search,
35 | episode_fulltext_search,
36 | episode_mentions_reranker,
37 | get_communities_by_nodes,
38 | get_edge_invalidation_candidates,
39 | get_embeddings_for_communities,
40 | get_embeddings_for_edges,
41 | get_embeddings_for_nodes,
42 | get_mentioned_nodes,
43 | get_relevant_edges,
44 | get_relevant_nodes,
45 | node_bfs_search,
46 | node_distance_reranker,
47 | node_fulltext_search,
48 | node_similarity_search,
49 | )
50 | from graphiti_core.utils.bulk_utils import add_nodes_and_edges_bulk
51 | from graphiti_core.utils.maintenance.community_operations import (
52 | determine_entity_community,
53 | get_community_clusters,
54 | remove_communities,
55 | )
56 | from graphiti_core.utils.maintenance.edge_operations import filter_existing_duplicate_of_edges
57 | from tests.helpers_test import (
58 | GraphProvider,
59 | assert_entity_edge_equals,
60 | assert_entity_node_equals,
61 | assert_episodic_edge_equals,
62 | assert_episodic_node_equals,
63 | get_edge_count,
64 | get_node_count,
65 | group_id,
66 | group_id_2,
67 | )
68 |
69 | pytest_plugins = ('pytest_asyncio',)
70 |
71 |
72 | @pytest.fixture
73 | def mock_llm_client():
74 | """Create a mock LLM"""
75 | mock_llm = Mock(spec=LLMClient)
76 | mock_llm.config = Mock()
77 | mock_llm.model = 'test-model'
78 | mock_llm.small_model = 'test-small-model'
79 | mock_llm.temperature = 0.0
80 | mock_llm.max_tokens = 1000
81 | mock_llm.cache_enabled = False
82 | mock_llm.cache_dir = None
83 |
84 | # Mock the public method that's actually called
85 | mock_llm.generate_response = Mock()
86 | mock_llm.generate_response.return_value = {
87 | 'tool_calls': [
88 | {
89 | 'name': 'extract_entities',
90 | 'arguments': {'entities': [{'entity': 'test_entity', 'entity_type': 'test_type'}]},
91 | }
92 | ]
93 | }
94 |
95 | return mock_llm
96 |
97 |
98 | @pytest.fixture
99 | def mock_cross_encoder_client():
100 | """Create a mock LLM"""
101 | mock_llm = Mock(spec=CrossEncoderClient)
102 | mock_llm.config = Mock()
103 |
104 | # Mock the public method that's actually called
105 | mock_llm.rerank = Mock()
106 | mock_llm.rerank.return_value = {
107 | 'tool_calls': [
108 | {
109 | 'name': 'extract_entities',
110 | 'arguments': {'entities': [{'entity': 'test_entity', 'entity_type': 'test_type'}]},
111 | }
112 | ]
113 | }
114 |
115 | return mock_llm
116 |
117 |
118 | @pytest.mark.asyncio
119 | async def test_add_bulk(graph_driver, mock_llm_client, mock_embedder, mock_cross_encoder_client):
120 | if graph_driver.provider == GraphProvider.FALKORDB:
121 | pytest.skip('Skipping as test fails on FalkorDB')
122 |
123 | graphiti = Graphiti(
124 | graph_driver=graph_driver,
125 | llm_client=mock_llm_client,
126 | embedder=mock_embedder,
127 | cross_encoder=mock_cross_encoder_client,
128 | )
129 |
130 | await graphiti.build_indices_and_constraints()
131 |
132 | now = datetime.now()
133 |
134 | # Create episodic nodes
135 | episode_node_1 = EpisodicNode(
136 | name='test_episode',
137 | group_id=group_id,
138 | labels=[],
139 | created_at=now,
140 | source=EpisodeType.message,
141 | source_description='conversation message',
142 | content='Alice likes Bob',
143 | valid_at=now,
144 | entity_edges=[], # Filled in later
145 | )
146 | episode_node_2 = EpisodicNode(
147 | name='test_episode_2',
148 | group_id=group_id,
149 | labels=[],
150 | created_at=now,
151 | source=EpisodeType.message,
152 | source_description='conversation message',
153 | content='Bob adores Alice',
154 | valid_at=now,
155 | entity_edges=[], # Filled in later
156 | )
157 |
158 | # Create entity nodes
159 | entity_node_1 = EntityNode(
160 | name='test_entity_1',
161 | group_id=group_id,
162 | labels=['Entity', 'Person'],
163 | created_at=now,
164 | summary='test_entity_1 summary',
165 | attributes={'age': 30, 'location': 'New York'},
166 | )
167 | await entity_node_1.generate_name_embedding(mock_embedder)
168 |
169 | entity_node_2 = EntityNode(
170 | name='test_entity_2',
171 | group_id=group_id,
172 | labels=['Entity', 'Person2'],
173 | created_at=now,
174 | summary='test_entity_2 summary',
175 | attributes={'age': 25, 'location': 'Los Angeles'},
176 | )
177 | await entity_node_2.generate_name_embedding(mock_embedder)
178 |
179 | entity_node_3 = EntityNode(
180 | name='test_entity_3',
181 | group_id=group_id,
182 | labels=['Entity', 'City', 'Location'],
183 | created_at=now,
184 | summary='test_entity_3 summary',
185 | attributes={'age': 25, 'location': 'Los Angeles'},
186 | )
187 | await entity_node_3.generate_name_embedding(mock_embedder)
188 |
189 | entity_node_4 = EntityNode(
190 | name='test_entity_4',
191 | group_id=group_id,
192 | labels=['Entity'],
193 | created_at=now,
194 | summary='test_entity_4 summary',
195 | attributes={'age': 25, 'location': 'Los Angeles'},
196 | )
197 | await entity_node_4.generate_name_embedding(mock_embedder)
198 |
199 | # Create entity edges
200 | entity_edge_1 = EntityEdge(
201 | source_node_uuid=entity_node_1.uuid,
202 | target_node_uuid=entity_node_2.uuid,
203 | created_at=now,
204 | name='likes',
205 | fact='test_entity_1 relates to test_entity_2',
206 | episodes=[],
207 | expired_at=now,
208 | valid_at=now,
209 | invalid_at=now,
210 | group_id=group_id,
211 | )
212 | await entity_edge_1.generate_embedding(mock_embedder)
213 |
214 | entity_edge_2 = EntityEdge(
215 | source_node_uuid=entity_node_3.uuid,
216 | target_node_uuid=entity_node_4.uuid,
217 | created_at=now,
218 | name='relates_to',
219 | fact='test_entity_3 relates to test_entity_4',
220 | episodes=[],
221 | expired_at=now,
222 | valid_at=now,
223 | invalid_at=now,
224 | group_id=group_id,
225 | )
226 | await entity_edge_2.generate_embedding(mock_embedder)
227 |
228 | # Create episodic to entity edges
229 | episodic_edge_1 = EpisodicEdge(
230 | source_node_uuid=episode_node_1.uuid,
231 | target_node_uuid=entity_node_1.uuid,
232 | created_at=now,
233 | group_id=group_id,
234 | )
235 | episodic_edge_2 = EpisodicEdge(
236 | source_node_uuid=episode_node_1.uuid,
237 | target_node_uuid=entity_node_2.uuid,
238 | created_at=now,
239 | group_id=group_id,
240 | )
241 | episodic_edge_3 = EpisodicEdge(
242 | source_node_uuid=episode_node_2.uuid,
243 | target_node_uuid=entity_node_3.uuid,
244 | created_at=now,
245 | group_id=group_id,
246 | )
247 | episodic_edge_4 = EpisodicEdge(
248 | source_node_uuid=episode_node_2.uuid,
249 | target_node_uuid=entity_node_4.uuid,
250 | created_at=now,
251 | group_id=group_id,
252 | )
253 |
254 | # Cross reference the ids
255 | episode_node_1.entity_edges = [entity_edge_1.uuid]
256 | episode_node_2.entity_edges = [entity_edge_2.uuid]
257 | entity_edge_1.episodes = [episode_node_1.uuid, episode_node_2.uuid]
258 | entity_edge_2.episodes = [episode_node_2.uuid]
259 |
260 | # Test add bulk
261 | await add_nodes_and_edges_bulk(
262 | graph_driver,
263 | [episode_node_1, episode_node_2],
264 | [episodic_edge_1, episodic_edge_2, episodic_edge_3, episodic_edge_4],
265 | [entity_node_1, entity_node_2, entity_node_3, entity_node_4],
266 | [entity_edge_1, entity_edge_2],
267 | mock_embedder,
268 | )
269 |
270 | node_ids = [
271 | episode_node_1.uuid,
272 | episode_node_2.uuid,
273 | entity_node_1.uuid,
274 | entity_node_2.uuid,
275 | entity_node_3.uuid,
276 | entity_node_4.uuid,
277 | ]
278 | edge_ids = [
279 | episodic_edge_1.uuid,
280 | episodic_edge_2.uuid,
281 | episodic_edge_3.uuid,
282 | episodic_edge_4.uuid,
283 | entity_edge_1.uuid,
284 | entity_edge_2.uuid,
285 | ]
286 | node_count = await get_node_count(graph_driver, node_ids)
287 | assert node_count == len(node_ids)
288 | edge_count = await get_edge_count(graph_driver, edge_ids)
289 | assert edge_count == len(edge_ids)
290 |
291 | # Test episodic nodes
292 | retrieved_episode = await EpisodicNode.get_by_uuid(graph_driver, episode_node_1.uuid)
293 | await assert_episodic_node_equals(retrieved_episode, episode_node_1)
294 |
295 | retrieved_episode = await EpisodicNode.get_by_uuid(graph_driver, episode_node_2.uuid)
296 | await assert_episodic_node_equals(retrieved_episode, episode_node_2)
297 |
298 | # Test entity nodes
299 | retrieved_entity_node = await EntityNode.get_by_uuid(graph_driver, entity_node_1.uuid)
300 | await assert_entity_node_equals(graph_driver, retrieved_entity_node, entity_node_1)
301 |
302 | retrieved_entity_node = await EntityNode.get_by_uuid(graph_driver, entity_node_2.uuid)
303 | await assert_entity_node_equals(graph_driver, retrieved_entity_node, entity_node_2)
304 |
305 | retrieved_entity_node = await EntityNode.get_by_uuid(graph_driver, entity_node_3.uuid)
306 | await assert_entity_node_equals(graph_driver, retrieved_entity_node, entity_node_3)
307 |
308 | retrieved_entity_node = await EntityNode.get_by_uuid(graph_driver, entity_node_4.uuid)
309 | await assert_entity_node_equals(graph_driver, retrieved_entity_node, entity_node_4)
310 |
311 | # Test episodic edges
312 | retrieved_episode_edge = await EpisodicEdge.get_by_uuid(graph_driver, episodic_edge_1.uuid)
313 | await assert_episodic_edge_equals(retrieved_episode_edge, episodic_edge_1)
314 |
315 | retrieved_episode_edge = await EpisodicEdge.get_by_uuid(graph_driver, episodic_edge_2.uuid)
316 | await assert_episodic_edge_equals(retrieved_episode_edge, episodic_edge_2)
317 |
318 | retrieved_episode_edge = await EpisodicEdge.get_by_uuid(graph_driver, episodic_edge_3.uuid)
319 | await assert_episodic_edge_equals(retrieved_episode_edge, episodic_edge_3)
320 |
321 | retrieved_episode_edge = await EpisodicEdge.get_by_uuid(graph_driver, episodic_edge_4.uuid)
322 | await assert_episodic_edge_equals(retrieved_episode_edge, episodic_edge_4)
323 |
324 | # Test entity edges
325 | retrieved_entity_edge = await EntityEdge.get_by_uuid(graph_driver, entity_edge_1.uuid)
326 | await assert_entity_edge_equals(graph_driver, retrieved_entity_edge, entity_edge_1)
327 |
328 | retrieved_entity_edge = await EntityEdge.get_by_uuid(graph_driver, entity_edge_2.uuid)
329 | await assert_entity_edge_equals(graph_driver, retrieved_entity_edge, entity_edge_2)
330 |
331 |
332 | @pytest.mark.asyncio
333 | async def test_remove_episode(
334 | graph_driver, mock_llm_client, mock_embedder, mock_cross_encoder_client
335 | ):
336 | graphiti = Graphiti(
337 | graph_driver=graph_driver,
338 | llm_client=mock_llm_client,
339 | embedder=mock_embedder,
340 | cross_encoder=mock_cross_encoder_client,
341 | )
342 |
343 | await graphiti.build_indices_and_constraints()
344 |
345 | now = datetime.now()
346 |
347 | # Create episodic nodes
348 | episode_node = EpisodicNode(
349 | name='test_episode',
350 | group_id=group_id,
351 | labels=[],
352 | created_at=now,
353 | source=EpisodeType.message,
354 | source_description='conversation message',
355 | content='Alice likes Bob',
356 | valid_at=now,
357 | entity_edges=[], # Filled in later
358 | )
359 |
360 | # Create entity nodes
361 | alice_node = EntityNode(
362 | name='Alice',
363 | group_id=group_id,
364 | labels=['Entity', 'Person'],
365 | created_at=now,
366 | summary='Alice summary',
367 | attributes={'age': 30, 'location': 'New York'},
368 | )
369 | await alice_node.generate_name_embedding(mock_embedder)
370 |
371 | bob_node = EntityNode(
372 | name='Bob',
373 | group_id=group_id,
374 | labels=['Entity', 'Person2'],
375 | created_at=now,
376 | summary='Bob summary',
377 | attributes={'age': 25, 'location': 'Los Angeles'},
378 | )
379 | await bob_node.generate_name_embedding(mock_embedder)
380 |
381 | # Create entity to entity edge
382 | entity_edge = EntityEdge(
383 | source_node_uuid=alice_node.uuid,
384 | target_node_uuid=bob_node.uuid,
385 | created_at=now,
386 | name='likes',
387 | fact='Alice likes Bob',
388 | episodes=[],
389 | expired_at=now,
390 | valid_at=now,
391 | invalid_at=now,
392 | group_id=group_id,
393 | )
394 | await entity_edge.generate_embedding(mock_embedder)
395 |
396 | # Create episodic to entity edges
397 | episodic_alice_edge = EpisodicEdge(
398 | source_node_uuid=episode_node.uuid,
399 | target_node_uuid=alice_node.uuid,
400 | created_at=now,
401 | group_id=group_id,
402 | )
403 | episodic_bob_edge = EpisodicEdge(
404 | source_node_uuid=episode_node.uuid,
405 | target_node_uuid=bob_node.uuid,
406 | created_at=now,
407 | group_id=group_id,
408 | )
409 |
410 | # Cross reference the ids
411 | episode_node.entity_edges = [entity_edge.uuid]
412 | entity_edge.episodes = [episode_node.uuid]
413 |
414 | # Test add bulk
415 | await add_nodes_and_edges_bulk(
416 | graph_driver,
417 | [episode_node],
418 | [episodic_alice_edge, episodic_bob_edge],
419 | [alice_node, bob_node],
420 | [entity_edge],
421 | mock_embedder,
422 | )
423 |
424 | node_ids = [episode_node.uuid, alice_node.uuid, bob_node.uuid]
425 | edge_ids = [episodic_alice_edge.uuid, episodic_bob_edge.uuid, entity_edge.uuid]
426 | node_count = await get_node_count(graph_driver, node_ids)
427 | assert node_count == 3
428 | edge_count = await get_edge_count(graph_driver, edge_ids)
429 | assert edge_count == 3
430 |
431 | # Test remove episode
432 | await graphiti.remove_episode(episode_node.uuid)
433 | node_count = await get_node_count(graph_driver, node_ids)
434 | assert node_count == 0
435 | edge_count = await get_edge_count(graph_driver, edge_ids)
436 | assert edge_count == 0
437 |
438 | # Test add bulk again
439 | await add_nodes_and_edges_bulk(
440 | graph_driver,
441 | [episode_node],
442 | [episodic_alice_edge, episodic_bob_edge],
443 | [alice_node, bob_node],
444 | [entity_edge],
445 | mock_embedder,
446 | )
447 | node_count = await get_node_count(graph_driver, node_ids)
448 | assert node_count == 3
449 | edge_count = await get_edge_count(graph_driver, edge_ids)
450 | assert edge_count == 3
451 |
452 |
453 | @pytest.mark.asyncio
454 | async def test_graphiti_retrieve_episodes(
455 | graph_driver, mock_llm_client, mock_embedder, mock_cross_encoder_client
456 | ):
457 | if graph_driver.provider == GraphProvider.FALKORDB:
458 | pytest.skip('Skipping as test fails on FalkorDB')
459 |
460 | graphiti = Graphiti(
461 | graph_driver=graph_driver,
462 | llm_client=mock_llm_client,
463 | embedder=mock_embedder,
464 | cross_encoder=mock_cross_encoder_client,
465 | )
466 |
467 | await graphiti.build_indices_and_constraints()
468 |
469 | now = datetime.now()
470 | valid_at_1 = now - timedelta(days=2)
471 | valid_at_2 = now - timedelta(days=4)
472 | valid_at_3 = now - timedelta(days=6)
473 |
474 | # Create episodic nodes
475 | episode_node_1 = EpisodicNode(
476 | name='test_episode_1',
477 | labels=[],
478 | created_at=now,
479 | valid_at=valid_at_1,
480 | source=EpisodeType.message,
481 | source_description='conversation message',
482 | content='Test message 1',
483 | entity_edges=[],
484 | group_id=group_id,
485 | )
486 | episode_node_2 = EpisodicNode(
487 | name='test_episode_2',
488 | labels=[],
489 | created_at=now,
490 | valid_at=valid_at_2,
491 | source=EpisodeType.message,
492 | source_description='conversation message',
493 | content='Test message 2',
494 | entity_edges=[],
495 | group_id=group_id,
496 | )
497 | episode_node_3 = EpisodicNode(
498 | name='test_episode_3',
499 | labels=[],
500 | created_at=now,
501 | valid_at=valid_at_3,
502 | source=EpisodeType.message,
503 | source_description='conversation message',
504 | content='Test message 3',
505 | entity_edges=[],
506 | group_id=group_id,
507 | )
508 |
509 | # Save the nodes
510 | await episode_node_1.save(graph_driver)
511 | await episode_node_2.save(graph_driver)
512 | await episode_node_3.save(graph_driver)
513 |
514 | node_ids = [episode_node_1.uuid, episode_node_2.uuid, episode_node_3.uuid]
515 | node_count = await get_node_count(graph_driver, node_ids)
516 | assert node_count == 3
517 |
518 | # Retrieve episodes
519 | query_time = now - timedelta(days=3)
520 | episodes = await graphiti.retrieve_episodes(
521 | query_time, last_n=5, group_ids=[group_id], source=EpisodeType.message
522 | )
523 | assert len(episodes) == 2
524 | assert episodes[0].name == episode_node_3.name
525 | assert episodes[1].name == episode_node_2.name
526 |
527 |
528 | @pytest.mark.asyncio
529 | async def test_filter_existing_duplicate_of_edges(graph_driver, mock_embedder):
530 | # Create entity nodes
531 | entity_node_1 = EntityNode(
532 | name='test_entity_1',
533 | labels=[],
534 | created_at=datetime.now(),
535 | group_id=group_id,
536 | )
537 | await entity_node_1.generate_name_embedding(mock_embedder)
538 | entity_node_2 = EntityNode(
539 | name='test_entity_2',
540 | labels=[],
541 | created_at=datetime.now(),
542 | group_id=group_id,
543 | )
544 | await entity_node_2.generate_name_embedding(mock_embedder)
545 | entity_node_3 = EntityNode(
546 | name='test_entity_3',
547 | labels=[],
548 | created_at=datetime.now(),
549 | group_id=group_id,
550 | )
551 | await entity_node_3.generate_name_embedding(mock_embedder)
552 | entity_node_4 = EntityNode(
553 | name='test_entity_4',
554 | labels=[],
555 | created_at=datetime.now(),
556 | group_id=group_id,
557 | )
558 | await entity_node_4.generate_name_embedding(mock_embedder)
559 |
560 | # Save the nodes
561 | await entity_node_1.save(graph_driver)
562 | await entity_node_2.save(graph_driver)
563 | await entity_node_3.save(graph_driver)
564 | await entity_node_4.save(graph_driver)
565 |
566 | node_ids = [entity_node_1.uuid, entity_node_2.uuid, entity_node_3.uuid, entity_node_4.uuid]
567 | node_count = await get_node_count(graph_driver, node_ids)
568 | assert node_count == 4
569 |
570 | # Create duplicate entity edge
571 | entity_edge = EntityEdge(
572 | source_node_uuid=entity_node_1.uuid,
573 | target_node_uuid=entity_node_2.uuid,
574 | name='IS_DUPLICATE_OF',
575 | fact='test_entity_1 is a duplicate of test_entity_2',
576 | created_at=datetime.now(),
577 | group_id=group_id,
578 | )
579 | await entity_edge.generate_embedding(mock_embedder)
580 | await entity_edge.save(graph_driver)
581 |
582 | # Filter duplicate entity edges
583 | duplicate_node_tuples = [
584 | (entity_node_1, entity_node_2),
585 | (entity_node_3, entity_node_4),
586 | ]
587 | node_tuples = await filter_existing_duplicate_of_edges(graph_driver, duplicate_node_tuples)
588 | assert len(node_tuples) == 1
589 | assert [node.name for node in node_tuples[0]] == [entity_node_3.name, entity_node_4.name]
590 |
591 |
592 | @pytest.mark.asyncio
593 | async def test_determine_entity_community(graph_driver, mock_embedder):
594 | if graph_driver.provider == GraphProvider.FALKORDB:
595 | pytest.skip('Skipping as test fails on FalkorDB')
596 |
597 | # Create entity nodes
598 | entity_node_1 = EntityNode(
599 | name='test_entity_1',
600 | labels=[],
601 | created_at=datetime.now(),
602 | group_id=group_id,
603 | )
604 | await entity_node_1.generate_name_embedding(mock_embedder)
605 | entity_node_2 = EntityNode(
606 | name='test_entity_2',
607 | labels=[],
608 | created_at=datetime.now(),
609 | group_id=group_id,
610 | )
611 | await entity_node_2.generate_name_embedding(mock_embedder)
612 | entity_node_3 = EntityNode(
613 | name='test_entity_3',
614 | labels=[],
615 | created_at=datetime.now(),
616 | group_id=group_id,
617 | )
618 | await entity_node_3.generate_name_embedding(mock_embedder)
619 | entity_node_4 = EntityNode(
620 | name='test_entity_4',
621 | labels=[],
622 | created_at=datetime.now(),
623 | group_id=group_id,
624 | )
625 | await entity_node_4.generate_name_embedding(mock_embedder)
626 |
627 | # Create entity edges
628 | entity_edge_1 = EntityEdge(
629 | source_node_uuid=entity_node_1.uuid,
630 | target_node_uuid=entity_node_4.uuid,
631 | name='RELATES_TO',
632 | fact='test_entity_1 relates to test_entity_4',
633 | created_at=datetime.now(),
634 | group_id=group_id,
635 | )
636 | await entity_edge_1.generate_embedding(mock_embedder)
637 | entity_edge_2 = EntityEdge(
638 | source_node_uuid=entity_node_2.uuid,
639 | target_node_uuid=entity_node_4.uuid,
640 | name='RELATES_TO',
641 | fact='test_entity_2 relates to test_entity_4',
642 | created_at=datetime.now(),
643 | group_id=group_id,
644 | )
645 | await entity_edge_2.generate_embedding(mock_embedder)
646 | entity_edge_3 = EntityEdge(
647 | source_node_uuid=entity_node_3.uuid,
648 | target_node_uuid=entity_node_4.uuid,
649 | name='RELATES_TO',
650 | fact='test_entity_3 relates to test_entity_4',
651 | created_at=datetime.now(),
652 | group_id=group_id,
653 | )
654 | await entity_edge_3.generate_embedding(mock_embedder)
655 |
656 | # Create community nodes
657 | community_node_1 = CommunityNode(
658 | name='test_community_1',
659 | labels=[],
660 | created_at=datetime.now(),
661 | group_id=group_id,
662 | )
663 | await community_node_1.generate_name_embedding(mock_embedder)
664 | community_node_2 = CommunityNode(
665 | name='test_community_2',
666 | labels=[],
667 | created_at=datetime.now(),
668 | group_id=group_id,
669 | )
670 | await community_node_2.generate_name_embedding(mock_embedder)
671 |
672 | # Create community to entity edges
673 | community_edge_1 = CommunityEdge(
674 | source_node_uuid=community_node_1.uuid,
675 | target_node_uuid=entity_node_1.uuid,
676 | created_at=datetime.now(),
677 | group_id=group_id,
678 | )
679 | community_edge_2 = CommunityEdge(
680 | source_node_uuid=community_node_1.uuid,
681 | target_node_uuid=entity_node_2.uuid,
682 | created_at=datetime.now(),
683 | group_id=group_id,
684 | )
685 | community_edge_3 = CommunityEdge(
686 | source_node_uuid=community_node_2.uuid,
687 | target_node_uuid=entity_node_3.uuid,
688 | created_at=datetime.now(),
689 | group_id=group_id,
690 | )
691 |
692 | # Save the graph
693 | await entity_node_1.save(graph_driver)
694 | await entity_node_2.save(graph_driver)
695 | await entity_node_3.save(graph_driver)
696 | await entity_node_4.save(graph_driver)
697 | await community_node_1.save(graph_driver)
698 | await community_node_2.save(graph_driver)
699 |
700 | await entity_edge_1.save(graph_driver)
701 | await entity_edge_2.save(graph_driver)
702 | await entity_edge_3.save(graph_driver)
703 | await community_edge_1.save(graph_driver)
704 | await community_edge_2.save(graph_driver)
705 | await community_edge_3.save(graph_driver)
706 |
707 | node_ids = [
708 | entity_node_1.uuid,
709 | entity_node_2.uuid,
710 | entity_node_3.uuid,
711 | entity_node_4.uuid,
712 | community_node_1.uuid,
713 | community_node_2.uuid,
714 | ]
715 | edge_ids = [
716 | entity_edge_1.uuid,
717 | entity_edge_2.uuid,
718 | entity_edge_3.uuid,
719 | community_edge_1.uuid,
720 | community_edge_2.uuid,
721 | community_edge_3.uuid,
722 | ]
723 | node_count = await get_node_count(graph_driver, node_ids)
724 | assert node_count == 6
725 | edge_count = await get_edge_count(graph_driver, edge_ids)
726 | assert edge_count == 6
727 |
728 | # Determine entity community
729 | community, is_new = await determine_entity_community(graph_driver, entity_node_4)
730 | assert community.name == community_node_1.name
731 | assert is_new
732 |
733 | # Add entity to community edge
734 | community_edge_4 = CommunityEdge(
735 | source_node_uuid=community_node_1.uuid,
736 | target_node_uuid=entity_node_4.uuid,
737 | created_at=datetime.now(),
738 | group_id=group_id,
739 | )
740 | await community_edge_4.save(graph_driver)
741 |
742 | # Determine entity community again
743 | community, is_new = await determine_entity_community(graph_driver, entity_node_4)
744 | assert community.name == community_node_1.name
745 | assert not is_new
746 |
747 | await remove_communities(graph_driver)
748 | node_count = await get_node_count(graph_driver, [community_node_1.uuid, community_node_2.uuid])
749 | assert node_count == 0
750 |
751 |
752 | @pytest.mark.asyncio
753 | async def test_get_community_clusters(graph_driver, mock_embedder):
754 | if graph_driver.provider == GraphProvider.FALKORDB:
755 | pytest.skip('Skipping as test fails on FalkorDB')
756 |
757 | # Create entity nodes
758 | entity_node_1 = EntityNode(
759 | name='test_entity_1',
760 | labels=[],
761 | created_at=datetime.now(),
762 | group_id=group_id,
763 | )
764 | await entity_node_1.generate_name_embedding(mock_embedder)
765 | entity_node_2 = EntityNode(
766 | name='test_entity_2',
767 | labels=[],
768 | created_at=datetime.now(),
769 | group_id=group_id,
770 | )
771 | await entity_node_2.generate_name_embedding(mock_embedder)
772 | entity_node_3 = EntityNode(
773 | name='test_entity_3',
774 | labels=[],
775 | created_at=datetime.now(),
776 | group_id=group_id_2,
777 | )
778 | await entity_node_3.generate_name_embedding(mock_embedder)
779 | entity_node_4 = EntityNode(
780 | name='test_entity_4',
781 | labels=[],
782 | created_at=datetime.now(),
783 | group_id=group_id_2,
784 | )
785 | await entity_node_4.generate_name_embedding(mock_embedder)
786 |
787 | # Create entity edges
788 | entity_edge_1 = EntityEdge(
789 | source_node_uuid=entity_node_1.uuid,
790 | target_node_uuid=entity_node_2.uuid,
791 | name='RELATES_TO',
792 | fact='test_entity_1 relates to test_entity_2',
793 | created_at=datetime.now(),
794 | group_id=group_id,
795 | )
796 | await entity_edge_1.generate_embedding(mock_embedder)
797 | entity_edge_2 = EntityEdge(
798 | source_node_uuid=entity_node_3.uuid,
799 | target_node_uuid=entity_node_4.uuid,
800 | name='RELATES_TO',
801 | fact='test_entity_3 relates to test_entity_4',
802 | created_at=datetime.now(),
803 | group_id=group_id_2,
804 | )
805 | await entity_edge_2.generate_embedding(mock_embedder)
806 |
807 | # Save the graph
808 | await entity_node_1.save(graph_driver)
809 | await entity_node_2.save(graph_driver)
810 | await entity_node_3.save(graph_driver)
811 | await entity_node_4.save(graph_driver)
812 | await entity_edge_1.save(graph_driver)
813 | await entity_edge_2.save(graph_driver)
814 |
815 | node_ids = [entity_node_1.uuid, entity_node_2.uuid, entity_node_3.uuid, entity_node_4.uuid]
816 | edge_ids = [entity_edge_1.uuid, entity_edge_2.uuid]
817 | node_count = await get_node_count(graph_driver, node_ids)
818 | assert node_count == 4
819 | edge_count = await get_edge_count(graph_driver, edge_ids)
820 | assert edge_count == 2
821 |
822 | # Get community clusters
823 | clusters = await get_community_clusters(graph_driver, group_ids=None)
824 | assert len(clusters) == 2
825 | assert len(clusters[0]) == 2
826 | assert len(clusters[1]) == 2
827 | entities_1 = set([node.name for node in clusters[0]])
828 | entities_2 = set([node.name for node in clusters[1]])
829 | assert entities_1 == set(['test_entity_1', 'test_entity_2']) or entities_2 == set(
830 | ['test_entity_1', 'test_entity_2']
831 | )
832 | assert entities_1 == set(['test_entity_3', 'test_entity_4']) or entities_2 == set(
833 | ['test_entity_3', 'test_entity_4']
834 | )
835 |
836 |
837 | @pytest.mark.asyncio
838 | async def test_get_mentioned_nodes(graph_driver, mock_embedder):
839 | # Create episodic nodes
840 | episodic_node_1 = EpisodicNode(
841 | name='test_episodic_1',
842 | labels=[],
843 | created_at=datetime.now(),
844 | group_id=group_id,
845 | source=EpisodeType.message,
846 | source_description='test_source_description',
847 | content='test_content',
848 | valid_at=datetime.now(),
849 | )
850 | # Create entity nodes
851 | entity_node_1 = EntityNode(
852 | name='test_entity_1',
853 | labels=[],
854 | created_at=datetime.now(),
855 | group_id=group_id,
856 | )
857 | await entity_node_1.generate_name_embedding(mock_embedder)
858 |
859 | # Create episodic to entity edges
860 | episodic_edge_1 = EpisodicEdge(
861 | source_node_uuid=episodic_node_1.uuid,
862 | target_node_uuid=entity_node_1.uuid,
863 | created_at=datetime.now(),
864 | group_id=group_id,
865 | )
866 |
867 | # Save the graph
868 | await episodic_node_1.save(graph_driver)
869 | await entity_node_1.save(graph_driver)
870 | await episodic_edge_1.save(graph_driver)
871 |
872 | # Get mentioned nodes
873 | mentioned_nodes = await get_mentioned_nodes(graph_driver, [episodic_node_1])
874 | assert len(mentioned_nodes) == 1
875 | assert mentioned_nodes[0].name == entity_node_1.name
876 |
877 |
878 | @pytest.mark.asyncio
879 | async def test_get_communities_by_nodes(graph_driver, mock_embedder):
880 | # Create entity nodes
881 | entity_node_1 = EntityNode(
882 | name='test_entity_1',
883 | labels=[],
884 | created_at=datetime.now(),
885 | group_id=group_id,
886 | )
887 | await entity_node_1.generate_name_embedding(mock_embedder)
888 |
889 | # Create community nodes
890 | community_node_1 = CommunityNode(
891 | name='test_community_1',
892 | labels=[],
893 | created_at=datetime.now(),
894 | group_id=group_id,
895 | )
896 | await community_node_1.generate_name_embedding(mock_embedder)
897 |
898 | # Create community to entity edges
899 | community_edge_1 = CommunityEdge(
900 | source_node_uuid=community_node_1.uuid,
901 | target_node_uuid=entity_node_1.uuid,
902 | created_at=datetime.now(),
903 | group_id=group_id,
904 | )
905 |
906 | # Save the graph
907 | await entity_node_1.save(graph_driver)
908 | await community_node_1.save(graph_driver)
909 | await community_edge_1.save(graph_driver)
910 |
911 | # Get communities by nodes
912 | communities = await get_communities_by_nodes(graph_driver, [entity_node_1])
913 | assert len(communities) == 1
914 | assert communities[0].name == community_node_1.name
915 |
916 |
917 | @pytest.mark.asyncio
918 | async def test_edge_fulltext_search(
919 | graph_driver, mock_embedder, mock_llm_client, mock_cross_encoder_client
920 | ):
921 | if graph_driver.provider == GraphProvider.KUZU:
922 | pytest.skip('Skipping as fulltext indexing not supported for Kuzu')
923 |
924 | graphiti = Graphiti(
925 | graph_driver=graph_driver,
926 | llm_client=mock_llm_client,
927 | embedder=mock_embedder,
928 | cross_encoder=mock_cross_encoder_client,
929 | )
930 | await graphiti.build_indices_and_constraints()
931 |
932 | # Create entity nodes
933 | entity_node_1 = EntityNode(
934 | name='test_entity_1',
935 | labels=[],
936 | created_at=datetime.now(),
937 | group_id=group_id,
938 | )
939 | await entity_node_1.generate_name_embedding(mock_embedder)
940 | entity_node_2 = EntityNode(
941 | name='test_entity_2',
942 | labels=[],
943 | created_at=datetime.now(),
944 | group_id=group_id,
945 | )
946 | await entity_node_2.generate_name_embedding(mock_embedder)
947 |
948 | now = datetime.now()
949 | created_at = now
950 | expired_at = now + timedelta(days=6)
951 | valid_at = now + timedelta(days=2)
952 | invalid_at = now + timedelta(days=4)
953 |
954 | # Create entity edges
955 | entity_edge_1 = EntityEdge(
956 | source_node_uuid=entity_node_1.uuid,
957 | target_node_uuid=entity_node_2.uuid,
958 | name='RELATES_TO',
959 | fact='test_entity_1 relates to test_entity_2',
960 | created_at=created_at,
961 | valid_at=valid_at,
962 | invalid_at=invalid_at,
963 | expired_at=expired_at,
964 | group_id=group_id,
965 | )
966 | await entity_edge_1.generate_embedding(mock_embedder)
967 |
968 | # Save the graph
969 | await entity_node_1.save(graph_driver)
970 | await entity_node_2.save(graph_driver)
971 | await entity_edge_1.save(graph_driver)
972 |
973 | # Search for entity edges
974 | search_filters = SearchFilters(
975 | node_labels=['Entity'],
976 | edge_types=['RELATES_TO'],
977 | created_at=[
978 | [DateFilter(date=created_at, comparison_operator=ComparisonOperator.equals)],
979 | ],
980 | expired_at=[
981 | [DateFilter(date=now, comparison_operator=ComparisonOperator.not_equals)],
982 | ],
983 | valid_at=[
984 | [
985 | DateFilter(
986 | date=now + timedelta(days=1),
987 | comparison_operator=ComparisonOperator.greater_than_equal,
988 | )
989 | ],
990 | [
991 | DateFilter(
992 | date=now + timedelta(days=3),
993 | comparison_operator=ComparisonOperator.less_than_equal,
994 | )
995 | ],
996 | ],
997 | invalid_at=[
998 | [
999 | DateFilter(
1000 | date=now + timedelta(days=3),
1001 | comparison_operator=ComparisonOperator.greater_than,
1002 | )
1003 | ],
1004 | [
1005 | DateFilter(
1006 | date=now + timedelta(days=5), comparison_operator=ComparisonOperator.less_than
1007 | )
1008 | ],
1009 | ],
1010 | )
1011 | edges = await edge_fulltext_search(
1012 | graph_driver, 'test_entity_1 relates to test_entity_2', search_filters, group_ids=[group_id]
1013 | )
1014 | assert len(edges) == 1
1015 | assert edges[0].name == entity_edge_1.name
1016 |
1017 |
1018 | @pytest.mark.asyncio
1019 | async def test_edge_similarity_search(graph_driver, mock_embedder):
1020 | if graph_driver.provider == GraphProvider.FALKORDB:
1021 | pytest.skip('Skipping as tests fail on Falkordb')
1022 |
1023 | # Create entity nodes
1024 | entity_node_1 = EntityNode(
1025 | name='test_entity_1',
1026 | labels=[],
1027 | created_at=datetime.now(),
1028 | group_id=group_id,
1029 | )
1030 | await entity_node_1.generate_name_embedding(mock_embedder)
1031 | entity_node_2 = EntityNode(
1032 | name='test_entity_2',
1033 | labels=[],
1034 | created_at=datetime.now(),
1035 | group_id=group_id,
1036 | )
1037 | await entity_node_2.generate_name_embedding(mock_embedder)
1038 |
1039 | now = datetime.now()
1040 | created_at = now
1041 | expired_at = now + timedelta(days=6)
1042 | valid_at = now + timedelta(days=2)
1043 | invalid_at = now + timedelta(days=4)
1044 |
1045 | # Create entity edges
1046 | entity_edge_1 = EntityEdge(
1047 | source_node_uuid=entity_node_1.uuid,
1048 | target_node_uuid=entity_node_2.uuid,
1049 | name='RELATES_TO',
1050 | fact='test_entity_1 relates to test_entity_2',
1051 | created_at=created_at,
1052 | valid_at=valid_at,
1053 | invalid_at=invalid_at,
1054 | expired_at=expired_at,
1055 | group_id=group_id,
1056 | )
1057 | await entity_edge_1.generate_embedding(mock_embedder)
1058 |
1059 | # Save the graph
1060 | await entity_node_1.save(graph_driver)
1061 | await entity_node_2.save(graph_driver)
1062 | await entity_edge_1.save(graph_driver)
1063 |
1064 | # Search for entity edges
1065 | search_filters = SearchFilters(
1066 | node_labels=['Entity'],
1067 | edge_types=['RELATES_TO'],
1068 | created_at=[
1069 | [DateFilter(date=created_at, comparison_operator=ComparisonOperator.equals)],
1070 | ],
1071 | expired_at=[
1072 | [DateFilter(date=now, comparison_operator=ComparisonOperator.not_equals)],
1073 | ],
1074 | valid_at=[
1075 | [
1076 | DateFilter(
1077 | date=now + timedelta(days=1),
1078 | comparison_operator=ComparisonOperator.greater_than_equal,
1079 | )
1080 | ],
1081 | [
1082 | DateFilter(
1083 | date=now + timedelta(days=3),
1084 | comparison_operator=ComparisonOperator.less_than_equal,
1085 | )
1086 | ],
1087 | ],
1088 | invalid_at=[
1089 | [
1090 | DateFilter(
1091 | date=now + timedelta(days=3),
1092 | comparison_operator=ComparisonOperator.greater_than,
1093 | )
1094 | ],
1095 | [
1096 | DateFilter(
1097 | date=now + timedelta(days=5), comparison_operator=ComparisonOperator.less_than
1098 | )
1099 | ],
1100 | ],
1101 | )
1102 | edges = await edge_similarity_search(
1103 | graph_driver,
1104 | entity_edge_1.fact_embedding,
1105 | entity_node_1.uuid,
1106 | entity_node_2.uuid,
1107 | search_filters,
1108 | group_ids=[group_id],
1109 | )
1110 | assert len(edges) == 1
1111 | assert edges[0].name == entity_edge_1.name
1112 |
1113 |
1114 | @pytest.mark.asyncio
1115 | async def test_edge_bfs_search(graph_driver, mock_embedder):
1116 | if graph_driver.provider == GraphProvider.FALKORDB:
1117 | pytest.skip('Skipping as tests fail on Falkordb')
1118 |
1119 | # Create episodic nodes
1120 | episodic_node_1 = EpisodicNode(
1121 | name='test_episodic_1',
1122 | labels=[],
1123 | created_at=datetime.now(),
1124 | group_id=group_id,
1125 | source=EpisodeType.message,
1126 | source_description='test_source_description',
1127 | content='test_content',
1128 | valid_at=datetime.now(),
1129 | )
1130 |
1131 | # Create entity nodes
1132 | entity_node_1 = EntityNode(
1133 | name='test_entity_1',
1134 | labels=[],
1135 | created_at=datetime.now(),
1136 | group_id=group_id,
1137 | )
1138 | await entity_node_1.generate_name_embedding(mock_embedder)
1139 | entity_node_2 = EntityNode(
1140 | name='test_entity_2',
1141 | labels=[],
1142 | created_at=datetime.now(),
1143 | group_id=group_id,
1144 | )
1145 | await entity_node_2.generate_name_embedding(mock_embedder)
1146 | entity_node_3 = EntityNode(
1147 | name='test_entity_3',
1148 | labels=[],
1149 | created_at=datetime.now(),
1150 | group_id=group_id,
1151 | )
1152 | await entity_node_3.generate_name_embedding(mock_embedder)
1153 |
1154 | now = datetime.now()
1155 | created_at = now
1156 | expired_at = now + timedelta(days=6)
1157 | valid_at = now + timedelta(days=2)
1158 | invalid_at = now + timedelta(days=4)
1159 |
1160 | # Create entity edges
1161 | entity_edge_1 = EntityEdge(
1162 | source_node_uuid=entity_node_1.uuid,
1163 | target_node_uuid=entity_node_2.uuid,
1164 | name='RELATES_TO',
1165 | fact='test_entity_1 relates to test_entity_2',
1166 | created_at=created_at,
1167 | valid_at=valid_at,
1168 | invalid_at=invalid_at,
1169 | expired_at=expired_at,
1170 | group_id=group_id,
1171 | )
1172 | await entity_edge_1.generate_embedding(mock_embedder)
1173 | entity_edge_2 = EntityEdge(
1174 | source_node_uuid=entity_node_2.uuid,
1175 | target_node_uuid=entity_node_3.uuid,
1176 | name='RELATES_TO',
1177 | fact='test_entity_2 relates to test_entity_3',
1178 | created_at=created_at,
1179 | valid_at=valid_at,
1180 | invalid_at=invalid_at,
1181 | expired_at=expired_at,
1182 | group_id=group_id,
1183 | )
1184 | await entity_edge_2.generate_embedding(mock_embedder)
1185 |
1186 | # Create episodic to entity edges
1187 | episodic_edge_1 = EpisodicEdge(
1188 | source_node_uuid=episodic_node_1.uuid,
1189 | target_node_uuid=entity_node_1.uuid,
1190 | created_at=datetime.now(),
1191 | group_id=group_id,
1192 | )
1193 |
1194 | # Save the graph
1195 | await episodic_node_1.save(graph_driver)
1196 | await entity_node_1.save(graph_driver)
1197 | await entity_node_2.save(graph_driver)
1198 | await entity_node_3.save(graph_driver)
1199 | await entity_edge_1.save(graph_driver)
1200 | await entity_edge_2.save(graph_driver)
1201 | await episodic_edge_1.save(graph_driver)
1202 |
1203 | # Search for entity edges
1204 | search_filters = SearchFilters(
1205 | node_labels=['Entity'],
1206 | edge_types=['RELATES_TO'],
1207 | created_at=[
1208 | [DateFilter(date=created_at, comparison_operator=ComparisonOperator.equals)],
1209 | ],
1210 | expired_at=[
1211 | [DateFilter(date=now, comparison_operator=ComparisonOperator.not_equals)],
1212 | ],
1213 | valid_at=[
1214 | [
1215 | DateFilter(
1216 | date=now + timedelta(days=1),
1217 | comparison_operator=ComparisonOperator.greater_than_equal,
1218 | )
1219 | ],
1220 | [
1221 | DateFilter(
1222 | date=now + timedelta(days=3),
1223 | comparison_operator=ComparisonOperator.less_than_equal,
1224 | )
1225 | ],
1226 | ],
1227 | invalid_at=[
1228 | [
1229 | DateFilter(
1230 | date=now + timedelta(days=3),
1231 | comparison_operator=ComparisonOperator.greater_than,
1232 | )
1233 | ],
1234 | [
1235 | DateFilter(
1236 | date=now + timedelta(days=5), comparison_operator=ComparisonOperator.less_than
1237 | )
1238 | ],
1239 | ],
1240 | )
1241 |
1242 | # Test bfs from episodic node
1243 |
1244 | edges = await edge_bfs_search(
1245 | graph_driver,
1246 | [episodic_node_1.uuid],
1247 | 1,
1248 | search_filters,
1249 | group_ids=[group_id],
1250 | )
1251 | assert len(edges) == 0
1252 |
1253 | edges = await edge_bfs_search(
1254 | graph_driver,
1255 | [episodic_node_1.uuid],
1256 | 2,
1257 | search_filters,
1258 | group_ids=[group_id],
1259 | )
1260 | edges_deduplicated = set({edge.uuid: edge.fact for edge in edges}.values())
1261 | assert len(edges_deduplicated) == 1
1262 | assert edges_deduplicated == {'test_entity_1 relates to test_entity_2'}
1263 |
1264 | edges = await edge_bfs_search(
1265 | graph_driver,
1266 | [episodic_node_1.uuid],
1267 | 3,
1268 | search_filters,
1269 | group_ids=[group_id],
1270 | )
1271 | edges_deduplicated = set({edge.uuid: edge.fact for edge in edges}.values())
1272 | assert len(edges_deduplicated) == 2
1273 | assert edges_deduplicated == {
1274 | 'test_entity_1 relates to test_entity_2',
1275 | 'test_entity_2 relates to test_entity_3',
1276 | }
1277 |
1278 | # Test bfs from entity node
1279 |
1280 | edges = await edge_bfs_search(
1281 | graph_driver,
1282 | [entity_node_1.uuid],
1283 | 1,
1284 | search_filters,
1285 | group_ids=[group_id],
1286 | )
1287 | edges_deduplicated = set({edge.uuid: edge.fact for edge in edges}.values())
1288 | assert len(edges_deduplicated) == 1
1289 | assert edges_deduplicated == {'test_entity_1 relates to test_entity_2'}
1290 |
1291 | edges = await edge_bfs_search(
1292 | graph_driver,
1293 | [entity_node_1.uuid],
1294 | 2,
1295 | search_filters,
1296 | group_ids=[group_id],
1297 | )
1298 | edges_deduplicated = set({edge.uuid: edge.fact for edge in edges}.values())
1299 | assert len(edges_deduplicated) == 2
1300 | assert edges_deduplicated == {
1301 | 'test_entity_1 relates to test_entity_2',
1302 | 'test_entity_2 relates to test_entity_3',
1303 | }
1304 |
1305 |
1306 | @pytest.mark.asyncio
1307 | async def test_node_fulltext_search(
1308 | graph_driver, mock_embedder, mock_llm_client, mock_cross_encoder_client
1309 | ):
1310 | if graph_driver.provider == GraphProvider.KUZU:
1311 | pytest.skip('Skipping as fulltext indexing not supported for Kuzu')
1312 |
1313 | graphiti = Graphiti(
1314 | graph_driver=graph_driver,
1315 | llm_client=mock_llm_client,
1316 | embedder=mock_embedder,
1317 | cross_encoder=mock_cross_encoder_client,
1318 | )
1319 | await graphiti.build_indices_and_constraints()
1320 |
1321 | # Create entity nodes
1322 | entity_node_1 = EntityNode(
1323 | name='test_entity_1',
1324 | summary='Summary about Alice',
1325 | labels=[],
1326 | created_at=datetime.now(),
1327 | group_id=group_id,
1328 | )
1329 | await entity_node_1.generate_name_embedding(mock_embedder)
1330 | entity_node_2 = EntityNode(
1331 | name='test_entity_2',
1332 | summary='Summary about Bob',
1333 | labels=[],
1334 | created_at=datetime.now(),
1335 | group_id=group_id,
1336 | )
1337 | await entity_node_2.generate_name_embedding(mock_embedder)
1338 |
1339 | # Save the graph
1340 | await entity_node_1.save(graph_driver)
1341 | await entity_node_2.save(graph_driver)
1342 |
1343 | # Search for entity edges
1344 | search_filters = SearchFilters(node_labels=['Entity'])
1345 | nodes = await node_fulltext_search(
1346 | graph_driver,
1347 | 'Alice',
1348 | search_filters,
1349 | group_ids=[group_id],
1350 | )
1351 | assert len(nodes) == 1
1352 | assert nodes[0].name == entity_node_1.name
1353 |
1354 |
1355 | @pytest.mark.asyncio
1356 | async def test_node_similarity_search(graph_driver, mock_embedder):
1357 | if graph_driver.provider == GraphProvider.FALKORDB:
1358 | pytest.skip('Skipping as tests fail on Falkordb')
1359 |
1360 | # Create entity nodes
1361 | entity_node_1 = EntityNode(
1362 | name='test_entity_alice',
1363 | summary='Summary about Alice',
1364 | labels=[],
1365 | created_at=datetime.now(),
1366 | group_id=group_id,
1367 | )
1368 | await entity_node_1.generate_name_embedding(mock_embedder)
1369 | entity_node_2 = EntityNode(
1370 | name='test_entity_bob',
1371 | summary='Summary about Bob',
1372 | labels=[],
1373 | created_at=datetime.now(),
1374 | group_id=group_id,
1375 | )
1376 | await entity_node_2.generate_name_embedding(mock_embedder)
1377 |
1378 | # Save the graph
1379 | await entity_node_1.save(graph_driver)
1380 | await entity_node_2.save(graph_driver)
1381 |
1382 | # Search for entity edges
1383 | search_filters = SearchFilters(node_labels=['Entity'])
1384 | nodes = await node_similarity_search(
1385 | graph_driver,
1386 | entity_node_1.name_embedding,
1387 | search_filters,
1388 | group_ids=[group_id],
1389 | min_score=0.9,
1390 | )
1391 | assert len(nodes) == 1
1392 | assert nodes[0].name == entity_node_1.name
1393 |
1394 |
1395 | @pytest.mark.asyncio
1396 | async def test_node_bfs_search(graph_driver, mock_embedder):
1397 | if graph_driver.provider == GraphProvider.FALKORDB:
1398 | pytest.skip('Skipping as tests fail on Falkordb')
1399 |
1400 | # Create episodic nodes
1401 | episodic_node_1 = EpisodicNode(
1402 | name='test_episodic_1',
1403 | labels=[],
1404 | created_at=datetime.now(),
1405 | group_id=group_id,
1406 | source=EpisodeType.message,
1407 | source_description='test_source_description',
1408 | content='test_content',
1409 | valid_at=datetime.now(),
1410 | )
1411 |
1412 | # Create entity nodes
1413 | entity_node_1 = EntityNode(
1414 | name='test_entity_1',
1415 | labels=[],
1416 | created_at=datetime.now(),
1417 | group_id=group_id,
1418 | )
1419 | await entity_node_1.generate_name_embedding(mock_embedder)
1420 | entity_node_2 = EntityNode(
1421 | name='test_entity_2',
1422 | labels=[],
1423 | created_at=datetime.now(),
1424 | group_id=group_id,
1425 | )
1426 | await entity_node_2.generate_name_embedding(mock_embedder)
1427 | entity_node_3 = EntityNode(
1428 | name='test_entity_3',
1429 | labels=[],
1430 | created_at=datetime.now(),
1431 | group_id=group_id,
1432 | )
1433 | await entity_node_3.generate_name_embedding(mock_embedder)
1434 |
1435 | # Create entity edges
1436 | entity_edge_1 = EntityEdge(
1437 | source_node_uuid=entity_node_1.uuid,
1438 | target_node_uuid=entity_node_2.uuid,
1439 | name='RELATES_TO',
1440 | fact='test_entity_1 relates to test_entity_2',
1441 | created_at=datetime.now(),
1442 | group_id=group_id,
1443 | )
1444 | await entity_edge_1.generate_embedding(mock_embedder)
1445 | entity_edge_2 = EntityEdge(
1446 | source_node_uuid=entity_node_2.uuid,
1447 | target_node_uuid=entity_node_3.uuid,
1448 | name='RELATES_TO',
1449 | fact='test_entity_2 relates to test_entity_3',
1450 | created_at=datetime.now(),
1451 | group_id=group_id,
1452 | )
1453 | await entity_edge_2.generate_embedding(mock_embedder)
1454 |
1455 | # Create episodic to entity edges
1456 | episodic_edge_1 = EpisodicEdge(
1457 | source_node_uuid=episodic_node_1.uuid,
1458 | target_node_uuid=entity_node_1.uuid,
1459 | created_at=datetime.now(),
1460 | group_id=group_id,
1461 | )
1462 |
1463 | # Save the graph
1464 | await episodic_node_1.save(graph_driver)
1465 | await entity_node_1.save(graph_driver)
1466 | await entity_node_2.save(graph_driver)
1467 | await entity_node_3.save(graph_driver)
1468 | await entity_edge_1.save(graph_driver)
1469 | await entity_edge_2.save(graph_driver)
1470 | await episodic_edge_1.save(graph_driver)
1471 |
1472 | # Search for entity nodes
1473 | search_filters = SearchFilters(
1474 | node_labels=['Entity'],
1475 | )
1476 |
1477 | # Test bfs from episodic node
1478 |
1479 | nodes = await node_bfs_search(
1480 | graph_driver,
1481 | [episodic_node_1.uuid],
1482 | search_filters,
1483 | 1,
1484 | group_ids=[group_id],
1485 | )
1486 | nodes_deduplicated = set({node.uuid: node.name for node in nodes}.values())
1487 | assert len(nodes_deduplicated) == 1
1488 | assert nodes_deduplicated == {'test_entity_1'}
1489 |
1490 | nodes = await node_bfs_search(
1491 | graph_driver,
1492 | [episodic_node_1.uuid],
1493 | search_filters,
1494 | 2,
1495 | group_ids=[group_id],
1496 | )
1497 | nodes_deduplicated = set({node.uuid: node.name for node in nodes}.values())
1498 | assert len(nodes_deduplicated) == 2
1499 | assert nodes_deduplicated == {'test_entity_1', 'test_entity_2'}
1500 |
1501 | # Test bfs from entity node
1502 |
1503 | nodes = await node_bfs_search(
1504 | graph_driver,
1505 | [entity_node_1.uuid],
1506 | search_filters,
1507 | 1,
1508 | group_ids=[group_id],
1509 | )
1510 | nodes_deduplicated = set({node.uuid: node.name for node in nodes}.values())
1511 | assert len(nodes_deduplicated) == 1
1512 | assert nodes_deduplicated == {'test_entity_2'}
1513 |
1514 |
1515 | @pytest.mark.asyncio
1516 | async def test_episode_fulltext_search(
1517 | graph_driver, mock_embedder, mock_llm_client, mock_cross_encoder_client
1518 | ):
1519 | if graph_driver.provider == GraphProvider.KUZU:
1520 | pytest.skip('Skipping as fulltext indexing not supported for Kuzu')
1521 |
1522 | graphiti = Graphiti(
1523 | graph_driver=graph_driver,
1524 | llm_client=mock_llm_client,
1525 | embedder=mock_embedder,
1526 | cross_encoder=mock_cross_encoder_client,
1527 | )
1528 | await graphiti.build_indices_and_constraints()
1529 |
1530 | # Create episodic nodes
1531 | episodic_node_1 = EpisodicNode(
1532 | name='test_episodic_1',
1533 | content='test_content',
1534 | created_at=datetime.now(),
1535 | valid_at=datetime.now(),
1536 | group_id=group_id,
1537 | source=EpisodeType.message,
1538 | source_description='Description about Alice',
1539 | )
1540 | episodic_node_2 = EpisodicNode(
1541 | name='test_episodic_2',
1542 | content='test_content_2',
1543 | created_at=datetime.now(),
1544 | valid_at=datetime.now(),
1545 | group_id=group_id,
1546 | source=EpisodeType.message,
1547 | source_description='Description about Bob',
1548 | )
1549 |
1550 | # Save the graph
1551 | await episodic_node_1.save(graph_driver)
1552 | await episodic_node_2.save(graph_driver)
1553 |
1554 | # Search for episodic nodes
1555 | search_filters = SearchFilters(node_labels=['Episodic'])
1556 | nodes = await episode_fulltext_search(
1557 | graph_driver,
1558 | 'Alice',
1559 | search_filters,
1560 | group_ids=[group_id],
1561 | )
1562 | assert len(nodes) == 1
1563 | assert nodes[0].name == episodic_node_1.name
1564 |
1565 |
1566 | @pytest.mark.asyncio
1567 | async def test_community_fulltext_search(
1568 | graph_driver, mock_embedder, mock_llm_client, mock_cross_encoder_client
1569 | ):
1570 | if graph_driver.provider == GraphProvider.KUZU:
1571 | pytest.skip('Skipping as fulltext indexing not supported for Kuzu')
1572 |
1573 | graphiti = Graphiti(
1574 | graph_driver=graph_driver,
1575 | llm_client=mock_llm_client,
1576 | embedder=mock_embedder,
1577 | cross_encoder=mock_cross_encoder_client,
1578 | )
1579 | await graphiti.build_indices_and_constraints()
1580 |
1581 | # Create community nodes
1582 | community_node_1 = CommunityNode(
1583 | name='Alice',
1584 | created_at=datetime.now(),
1585 | group_id=group_id,
1586 | )
1587 | await community_node_1.generate_name_embedding(mock_embedder)
1588 | community_node_2 = CommunityNode(
1589 | name='Bob',
1590 | created_at=datetime.now(),
1591 | group_id=group_id,
1592 | )
1593 | await community_node_2.generate_name_embedding(mock_embedder)
1594 |
1595 | # Save the graph
1596 | await community_node_1.save(graph_driver)
1597 | await community_node_2.save(graph_driver)
1598 |
1599 | # Search for community nodes
1600 | nodes = await community_fulltext_search(
1601 | graph_driver,
1602 | 'Alice',
1603 | group_ids=[group_id],
1604 | )
1605 | assert len(nodes) == 1
1606 | assert nodes[0].name == community_node_1.name
1607 |
1608 |
1609 | @pytest.mark.asyncio
1610 | async def test_community_similarity_search(
1611 | graph_driver, mock_embedder, mock_llm_client, mock_cross_encoder_client
1612 | ):
1613 | if graph_driver.provider == GraphProvider.FALKORDB:
1614 | pytest.skip('Skipping as tests fail on Falkordb')
1615 |
1616 | graphiti = Graphiti(
1617 | graph_driver=graph_driver,
1618 | llm_client=mock_llm_client,
1619 | embedder=mock_embedder,
1620 | cross_encoder=mock_cross_encoder_client,
1621 | )
1622 | await graphiti.build_indices_and_constraints()
1623 |
1624 | # Create community nodes
1625 | community_node_1 = CommunityNode(
1626 | name='Alice',
1627 | created_at=datetime.now(),
1628 | group_id=group_id,
1629 | )
1630 | await community_node_1.generate_name_embedding(mock_embedder)
1631 | community_node_2 = CommunityNode(
1632 | name='Bob',
1633 | created_at=datetime.now(),
1634 | group_id=group_id,
1635 | )
1636 | await community_node_2.generate_name_embedding(mock_embedder)
1637 |
1638 | # Save the graph
1639 | await community_node_1.save(graph_driver)
1640 | await community_node_2.save(graph_driver)
1641 |
1642 | # Search for community nodes
1643 | nodes = await community_similarity_search(
1644 | graph_driver,
1645 | community_node_1.name_embedding,
1646 | group_ids=[group_id],
1647 | min_score=0.9,
1648 | )
1649 | assert len(nodes) == 1
1650 | assert nodes[0].name == community_node_1.name
1651 |
1652 |
1653 | @pytest.mark.asyncio
1654 | async def test_get_relevant_nodes(
1655 | graph_driver, mock_embedder, mock_llm_client, mock_cross_encoder_client
1656 | ):
1657 | if graph_driver.provider == GraphProvider.FALKORDB:
1658 | pytest.skip('Skipping as tests fail on Falkordb')
1659 |
1660 | if graph_driver.provider == GraphProvider.KUZU:
1661 | pytest.skip('Skipping as tests fail on Kuzu')
1662 |
1663 | graphiti = Graphiti(
1664 | graph_driver=graph_driver,
1665 | llm_client=mock_llm_client,
1666 | embedder=mock_embedder,
1667 | cross_encoder=mock_cross_encoder_client,
1668 | )
1669 | await graphiti.build_indices_and_constraints()
1670 |
1671 | # Create entity nodes
1672 | entity_node_1 = EntityNode(
1673 | name='Alice',
1674 | summary='Alice',
1675 | labels=[],
1676 | created_at=datetime.now(),
1677 | group_id=group_id,
1678 | )
1679 | await entity_node_1.generate_name_embedding(mock_embedder)
1680 | entity_node_2 = EntityNode(
1681 | name='Bob',
1682 | summary='Bob',
1683 | labels=[],
1684 | created_at=datetime.now(),
1685 | group_id=group_id,
1686 | )
1687 | await entity_node_2.generate_name_embedding(mock_embedder)
1688 | entity_node_3 = EntityNode(
1689 | name='Alice Smith',
1690 | summary='Alice Smith',
1691 | labels=[],
1692 | created_at=datetime.now(),
1693 | group_id=group_id,
1694 | )
1695 | await entity_node_3.generate_name_embedding(mock_embedder)
1696 |
1697 | # Save the graph
1698 | await entity_node_1.save(graph_driver)
1699 | await entity_node_2.save(graph_driver)
1700 | await entity_node_3.save(graph_driver)
1701 |
1702 | # Search for entity nodes
1703 | search_filters = SearchFilters(node_labels=['Entity'])
1704 | nodes = (
1705 | await get_relevant_nodes(
1706 | graph_driver,
1707 | [entity_node_1],
1708 | search_filters,
1709 | min_score=0.9,
1710 | )
1711 | )[0]
1712 | assert len(nodes) == 2
1713 | assert set({node.name for node in nodes}) == {entity_node_1.name, entity_node_3.name}
1714 |
1715 |
1716 | @pytest.mark.asyncio
1717 | async def test_get_relevant_edges_and_invalidation_candidates(
1718 | graph_driver, mock_embedder, mock_llm_client, mock_cross_encoder_client
1719 | ):
1720 | if graph_driver.provider == GraphProvider.FALKORDB:
1721 | pytest.skip('Skipping as tests fail on Falkordb')
1722 |
1723 | graphiti = Graphiti(
1724 | graph_driver=graph_driver,
1725 | llm_client=mock_llm_client,
1726 | embedder=mock_embedder,
1727 | cross_encoder=mock_cross_encoder_client,
1728 | )
1729 | await graphiti.build_indices_and_constraints()
1730 |
1731 | # Create entity nodes
1732 | entity_node_1 = EntityNode(
1733 | name='test_entity_1',
1734 | summary='test_entity_1',
1735 | labels=[],
1736 | created_at=datetime.now(),
1737 | group_id=group_id,
1738 | )
1739 | await entity_node_1.generate_name_embedding(mock_embedder)
1740 | entity_node_2 = EntityNode(
1741 | name='test_entity_2',
1742 | summary='test_entity_2',
1743 | labels=[],
1744 | created_at=datetime.now(),
1745 | group_id=group_id,
1746 | )
1747 | await entity_node_2.generate_name_embedding(mock_embedder)
1748 | entity_node_3 = EntityNode(
1749 | name='test_entity_3',
1750 | summary='test_entity_3',
1751 | labels=[],
1752 | created_at=datetime.now(),
1753 | group_id=group_id,
1754 | )
1755 | await entity_node_3.generate_name_embedding(mock_embedder)
1756 |
1757 | now = datetime.now()
1758 | created_at = now
1759 | expired_at = now + timedelta(days=6)
1760 | valid_at = now + timedelta(days=2)
1761 | invalid_at = now + timedelta(days=4)
1762 |
1763 | # Create entity edges
1764 | entity_edge_1 = EntityEdge(
1765 | source_node_uuid=entity_node_1.uuid,
1766 | target_node_uuid=entity_node_2.uuid,
1767 | name='RELATES_TO',
1768 | fact='Alice',
1769 | created_at=created_at,
1770 | expired_at=expired_at,
1771 | valid_at=valid_at,
1772 | invalid_at=invalid_at,
1773 | group_id=group_id,
1774 | )
1775 | await entity_edge_1.generate_embedding(mock_embedder)
1776 | entity_edge_2 = EntityEdge(
1777 | source_node_uuid=entity_node_2.uuid,
1778 | target_node_uuid=entity_node_3.uuid,
1779 | name='RELATES_TO',
1780 | fact='Bob',
1781 | created_at=created_at,
1782 | expired_at=expired_at,
1783 | valid_at=valid_at,
1784 | invalid_at=invalid_at,
1785 | group_id=group_id,
1786 | )
1787 | await entity_edge_2.generate_embedding(mock_embedder)
1788 | entity_edge_3 = EntityEdge(
1789 | source_node_uuid=entity_node_1.uuid,
1790 | target_node_uuid=entity_node_3.uuid,
1791 | name='RELATES_TO',
1792 | fact='Alice',
1793 | created_at=created_at,
1794 | expired_at=expired_at,
1795 | valid_at=valid_at,
1796 | invalid_at=invalid_at,
1797 | group_id=group_id,
1798 | )
1799 | await entity_edge_3.generate_embedding(mock_embedder)
1800 |
1801 | # Save the graph
1802 | await entity_node_1.save(graph_driver)
1803 | await entity_node_2.save(graph_driver)
1804 | await entity_node_3.save(graph_driver)
1805 | await entity_edge_1.save(graph_driver)
1806 | await entity_edge_2.save(graph_driver)
1807 | await entity_edge_3.save(graph_driver)
1808 |
1809 | # Search for entity nodes
1810 | search_filters = SearchFilters(
1811 | node_labels=['Entity'],
1812 | edge_types=['RELATES_TO'],
1813 | created_at=[
1814 | [DateFilter(date=created_at, comparison_operator=ComparisonOperator.equals)],
1815 | ],
1816 | expired_at=[
1817 | [DateFilter(date=now, comparison_operator=ComparisonOperator.not_equals)],
1818 | ],
1819 | valid_at=[
1820 | [
1821 | DateFilter(
1822 | date=now + timedelta(days=1),
1823 | comparison_operator=ComparisonOperator.greater_than_equal,
1824 | )
1825 | ],
1826 | [
1827 | DateFilter(
1828 | date=now + timedelta(days=3),
1829 | comparison_operator=ComparisonOperator.less_than_equal,
1830 | )
1831 | ],
1832 | ],
1833 | invalid_at=[
1834 | [
1835 | DateFilter(
1836 | date=now + timedelta(days=3),
1837 | comparison_operator=ComparisonOperator.greater_than,
1838 | )
1839 | ],
1840 | [
1841 | DateFilter(
1842 | date=now + timedelta(days=5), comparison_operator=ComparisonOperator.less_than
1843 | )
1844 | ],
1845 | ],
1846 | )
1847 | edges = (
1848 | await get_relevant_edges(
1849 | graph_driver,
1850 | [entity_edge_1],
1851 | search_filters,
1852 | min_score=0.9,
1853 | )
1854 | )[0]
1855 | assert len(edges) == 1
1856 | assert set({edge.name for edge in edges}) == {entity_edge_1.name}
1857 |
1858 | edges = (
1859 | await get_edge_invalidation_candidates(
1860 | graph_driver,
1861 | [entity_edge_1],
1862 | search_filters,
1863 | min_score=0.9,
1864 | )
1865 | )[0]
1866 | assert len(edges) == 2
1867 | assert set({edge.name for edge in edges}) == {entity_edge_1.name, entity_edge_3.name}
1868 |
1869 |
1870 | @pytest.mark.asyncio
1871 | async def test_node_distance_reranker(graph_driver, mock_embedder):
1872 | if graph_driver.provider == GraphProvider.FALKORDB:
1873 | pytest.skip('Skipping as tests fail on Falkordb')
1874 |
1875 | # Create entity nodes
1876 | entity_node_1 = EntityNode(
1877 | name='test_entity_1',
1878 | labels=[],
1879 | created_at=datetime.now(),
1880 | group_id=group_id,
1881 | )
1882 | await entity_node_1.generate_name_embedding(mock_embedder)
1883 | entity_node_2 = EntityNode(
1884 | name='test_entity_2',
1885 | labels=[],
1886 | created_at=datetime.now(),
1887 | group_id=group_id,
1888 | )
1889 | await entity_node_2.generate_name_embedding(mock_embedder)
1890 | entity_node_3 = EntityNode(
1891 | name='test_entity_3',
1892 | labels=[],
1893 | created_at=datetime.now(),
1894 | group_id=group_id,
1895 | )
1896 | await entity_node_3.generate_name_embedding(mock_embedder)
1897 |
1898 | # Create entity edges
1899 | entity_edge_1 = EntityEdge(
1900 | source_node_uuid=entity_node_1.uuid,
1901 | target_node_uuid=entity_node_2.uuid,
1902 | name='RELATES_TO',
1903 | fact='test_entity_1 relates to test_entity_2',
1904 | created_at=datetime.now(),
1905 | group_id=group_id,
1906 | )
1907 | await entity_edge_1.generate_embedding(mock_embedder)
1908 |
1909 | # Save the graph
1910 | await entity_node_1.save(graph_driver)
1911 | await entity_node_2.save(graph_driver)
1912 | await entity_node_3.save(graph_driver)
1913 | await entity_edge_1.save(graph_driver)
1914 |
1915 | # Test reranker
1916 | reranked_uuids, reranked_scores = await node_distance_reranker(
1917 | graph_driver,
1918 | [entity_node_2.uuid, entity_node_3.uuid],
1919 | entity_node_1.uuid,
1920 | )
1921 | uuid_to_name = {
1922 | entity_node_1.uuid: entity_node_1.name,
1923 | entity_node_2.uuid: entity_node_2.name,
1924 | entity_node_3.uuid: entity_node_3.name,
1925 | }
1926 | names = [uuid_to_name[uuid] for uuid in reranked_uuids]
1927 | assert names == [entity_node_2.name, entity_node_3.name]
1928 | assert np.allclose(reranked_scores, [1.0, 0.0])
1929 |
1930 |
1931 | @pytest.mark.asyncio
1932 | async def test_episode_mentions_reranker(graph_driver, mock_embedder):
1933 | if graph_driver.provider == GraphProvider.FALKORDB:
1934 | pytest.skip('Skipping as tests fail on Falkordb')
1935 |
1936 | # Create episodic nodes
1937 | episodic_node_1 = EpisodicNode(
1938 | name='test_episodic_1',
1939 | content='test_content',
1940 | created_at=datetime.now(),
1941 | valid_at=datetime.now(),
1942 | group_id=group_id,
1943 | source=EpisodeType.message,
1944 | source_description='Description about Alice',
1945 | )
1946 |
1947 | # Create entity nodes
1948 | entity_node_1 = EntityNode(
1949 | name='test_entity_1',
1950 | labels=[],
1951 | created_at=datetime.now(),
1952 | group_id=group_id,
1953 | )
1954 | await entity_node_1.generate_name_embedding(mock_embedder)
1955 | entity_node_2 = EntityNode(
1956 | name='test_entity_2',
1957 | labels=[],
1958 | created_at=datetime.now(),
1959 | group_id=group_id,
1960 | )
1961 | await entity_node_2.generate_name_embedding(mock_embedder)
1962 |
1963 | # Create entity edges
1964 | episodic_edge_1 = EpisodicEdge(
1965 | source_node_uuid=episodic_node_1.uuid,
1966 | target_node_uuid=entity_node_1.uuid,
1967 | created_at=datetime.now(),
1968 | group_id=group_id,
1969 | )
1970 |
1971 | # Save the graph
1972 | await entity_node_1.save(graph_driver)
1973 | await entity_node_2.save(graph_driver)
1974 | await episodic_node_1.save(graph_driver)
1975 | await episodic_edge_1.save(graph_driver)
1976 |
1977 | # Test reranker
1978 | reranked_uuids, reranked_scores = await episode_mentions_reranker(
1979 | graph_driver,
1980 | [[entity_node_1.uuid, entity_node_2.uuid]],
1981 | )
1982 | uuid_to_name = {entity_node_1.uuid: entity_node_1.name, entity_node_2.uuid: entity_node_2.name}
1983 | names = [uuid_to_name[uuid] for uuid in reranked_uuids]
1984 | assert names == [entity_node_1.name, entity_node_2.name]
1985 | assert np.allclose(reranked_scores, [1.0, float('inf')])
1986 |
1987 |
1988 | @pytest.mark.asyncio
1989 | async def test_get_embeddings_for_edges(graph_driver, mock_embedder):
1990 | # Create entity nodes
1991 | entity_node_1 = EntityNode(
1992 | name='test_entity_1',
1993 | labels=[],
1994 | created_at=datetime.now(),
1995 | group_id=group_id,
1996 | )
1997 | await entity_node_1.generate_name_embedding(mock_embedder)
1998 | entity_node_2 = EntityNode(
1999 | name='test_entity_2',
2000 | labels=[],
2001 | created_at=datetime.now(),
2002 | group_id=group_id,
2003 | )
2004 | await entity_node_2.generate_name_embedding(mock_embedder)
2005 |
2006 | # Create entity edges
2007 | entity_edge_1 = EntityEdge(
2008 | source_node_uuid=entity_node_1.uuid,
2009 | target_node_uuid=entity_node_2.uuid,
2010 | name='RELATES_TO',
2011 | fact='test_entity_1 relates to test_entity_2',
2012 | created_at=datetime.now(),
2013 | group_id=group_id,
2014 | )
2015 | await entity_edge_1.generate_embedding(mock_embedder)
2016 |
2017 | # Save the graph
2018 | await entity_node_1.save(graph_driver)
2019 | await entity_node_2.save(graph_driver)
2020 | await entity_edge_1.save(graph_driver)
2021 |
2022 | # Get embeddings for edges
2023 | embeddings = await get_embeddings_for_edges(graph_driver, [entity_edge_1])
2024 | assert len(embeddings) == 1
2025 | assert entity_edge_1.uuid in embeddings
2026 | assert np.allclose(embeddings[entity_edge_1.uuid], entity_edge_1.fact_embedding)
2027 |
2028 |
2029 | @pytest.mark.asyncio
2030 | async def test_get_embeddings_for_nodes(graph_driver, mock_embedder):
2031 | # Create entity nodes
2032 | entity_node_1 = EntityNode(
2033 | name='test_entity_1',
2034 | labels=[],
2035 | created_at=datetime.now(),
2036 | group_id=group_id,
2037 | )
2038 | await entity_node_1.generate_name_embedding(mock_embedder)
2039 |
2040 | # Save the graph
2041 | await entity_node_1.save(graph_driver)
2042 |
2043 | # Get embeddings for edges
2044 | embeddings = await get_embeddings_for_nodes(graph_driver, [entity_node_1])
2045 | assert len(embeddings) == 1
2046 | assert entity_node_1.uuid in embeddings
2047 | assert np.allclose(embeddings[entity_node_1.uuid], entity_node_1.name_embedding)
2048 |
2049 |
2050 | @pytest.mark.asyncio
2051 | async def test_get_embeddings_for_communities(graph_driver, mock_embedder):
2052 | # Create community nodes
2053 | community_node_1 = CommunityNode(
2054 | name='test_community_1',
2055 | labels=[],
2056 | created_at=datetime.now(),
2057 | group_id=group_id,
2058 | )
2059 | await community_node_1.generate_name_embedding(mock_embedder)
2060 |
2061 | # Save the graph
2062 | await community_node_1.save(graph_driver)
2063 |
2064 | # Get embeddings for communities
2065 | embeddings = await get_embeddings_for_communities(graph_driver, [community_node_1])
2066 | assert len(embeddings) == 1
2067 | assert community_node_1.uuid in embeddings
2068 | assert np.allclose(embeddings[community_node_1.uuid], community_node_1.name_embedding)
2069 |
```