MongoDB Atlas Vector Store
If youβre opening this Notebook on colab, you will probably need to install LlamaIndex π¦.
%pip install llama-index-vector-stores-mongodb
!pip install llama-index
# Provide URI to constructor, or use environment variableimport pymongofrom llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearchfrom llama_index.core import VectorStoreIndexfrom llama_index.core import StorageContextfrom llama_index.core import SimpleDirectoryReader
Download Data
!mkdir -p 'data/10k/'!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf'
# mongo_uri = os.environ["MONGO_URI"]mongo_uri = ( "mongodb+srv://<username>:<password>@<host>?retryWrites=true&w=majority")mongodb_client = pymongo.MongoClient(mongo_uri)store = MongoDBAtlasVectorSearch(mongodb_client)store.create_vector_search_index( dimensions=1536, path="embedding", similarity="cosine")storage_context = StorageContext.from_defaults(vector_store=store)uber_docs = SimpleDirectoryReader( input_files=["./data/10k/uber_2021.pdf"]).load_data()index = VectorStoreIndex.from_documents( uber_docs, storage_context=storage_context)
response = index.as_query_engine().query("What was Uber's revenue?")display(Markdown(f"<b>{response}</b>"))
from llama_index.core import Response
# Initial size
print(store._collection.count_documents({}))# Get a ref_doc_idtyped_response = ( response if isinstance(response, Response) else response.get_response())ref_doc_id = typed_response.source_nodes[0].node.ref_doc_idprint(store._collection.count_documents({"metadata.ref_doc_id": ref_doc_id}))# Test store deleteif ref_doc_id: store.delete(ref_doc_id) print(store._collection.count_documents({}))
445414453
Note: For MongoDB Atlas, you have to create an Atlas Search Index.