Alibaba Cloud MySQL
Alibaba Cloud MySQL, also named as ApsaraDB RDS for MySQL. ApsaraDB RDS for MySQL is an online database service that is based on a branch of MySQL source code and offers high performance. ApsaraDB RDS for MySQL is a proven solution that has handled large volumes of concurrent traffic during Double 11. ApsaraDB RDS for MySQL provides basic features such as whitelist configuration, backup and recovery, Transparent Data Encryption (TDE), data migration, and management of instances, accounts, and databases. For more information, see RDS MySQL Feature Overview.
To run this notebook you need a ApsaraDB RDS MySQL instance running in the cloud, create an account and create needed databases. You can refer to this link.
In this notebook, we need to create databases called llama_index_test and llama_index_meta_test in your ApsaraDB RDS MySQL instance.
If you’re opening this Notebook on colab, you will probably need to ensure you have llama-index installed:
!pip install llama-index%pip install llama-index-vector-stores-alibabacloud-mysql# choose dashscope as embedding and llm model, your can also use default openai or other model to test%pip install llama-index-embeddings-dashscope%pip install llama-index-llms-dashscopeConfig dashscope embedding and llm model, your can also use default openai or other model to test. If you choose to use dashscope model, you can get your api key here, and set it in the following code:
!export DASHSCOPE_API_KEY="your_api_key"Download example data
Section titled “Download example data”!mkdir -p 'data/paul_graham/'!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'RAG Demo using Alibaba Cloud MySQL
Section titled “RAG Demo using Alibaba Cloud MySQL”Simple Query
Section titled “Simple Query”Load Data for Simple Query
Section titled “Load Data for Simple Query”import os
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY")
from llama_index.core import ( VectorStoreIndex, SimpleDirectoryReader, StorageContext,)from llama_index.vector_stores.alibabacloud_mysql.base import ( AlibabaCloudMySQLVectorStore,)
# set Embbeding modelfrom llama_index.core import Settingsfrom llama_index.embeddings.dashscope import DashScopeEmbedding
Settings.embed_model = DashScopeEmbedding(api_key=DASHSCOPE_API_KEY)
# config llm modelfrom llama_index.llms.dashscope import DashScope, DashScopeGenerationModels
dashscope_llm = DashScope( model_name=DashScopeGenerationModels.QWEN_MAX, api_key=DASHSCOPE_API_KEY)
documents = SimpleDirectoryReader("data/paul_graham/").load_data()print(f"Total documents: {len(documents)}")print(f"First document, id: {documents[0].doc_id}")print(f"First document, hash: {documents[0].hash}")print( "First document, text" f" ({len(documents[0].text)} characters):\n{'='*20}\n{documents[0].text[:360]} ...")
print( """################## simple generate vector#################""")client = AlibabaCloudMySQLVectorStore.from_params( host="rm-***.mysql.***.rds.aliyuncs.com", port=3306, user="user", password="password", database="llama_index_test", distance_method="COSINE",)storage_context = StorageContext.from_defaults(vector_store=client)VectorStoreIndex.from_documents( documents, storage_context=storage_context, show_progress=True)Query using AlibabaCloudMySQL with Search Test
Section titled “Query using AlibabaCloudMySQL with Search Test”import os
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY")
from llama_index.core import VectorStoreIndexfrom llama_index.vector_stores.alibabacloud_mysql.base import ( AlibabaCloudMySQLVectorStore,)
# set Embbeding modelfrom llama_index.core import Settingsfrom llama_index.embeddings.dashscope import DashScopeEmbedding
embed_model = DashScopeEmbedding(api_key=DASHSCOPE_API_KEY)# Global SettingsSettings.embed_model = embed_model
# config llm modelfrom llama_index.llms.dashscope import DashScope, DashScopeGenerationModels
dashscope_llm = DashScope( model_name=DashScopeGenerationModels.QWEN_MAX, api_key=DASHSCOPE_API_KEY)
print( """################## Basic Querying including Search Test#################""")client = AlibabaCloudMySQLVectorStore.from_params( host="rm-***.mysql.eu-west-1.rds.***.com", port=3306, user="user", password="password", database="llama_index_test", distance_method="COSINE",)index = VectorStoreIndex.from_vector_store( vector_store=client, embed_model=embed_model)
QUESTION = "What did the author do growing up?"# Set Retrievervector_retriever = index.as_retriever()# searchsource_nodes = vector_retriever.retrieve(QUESTION)# check source_nodesprint(f"Question: {QUESTION}")for node in source_nodes: print(f"---------------------------------------------") print("Search Test") print(f"---------------------------------------------") print(f"Score: {node.score:.3f}") print(node.get_content()) print(f"---------------------------------------------")
# run queryquery_engine = index.as_query_engine(llm=dashscope_llm)res = query_engine.query(QUESTION)print(f"Answer: {res.response}")print(f"---------------------------------------------\n\n")Metadata Filtering
Section titled “Metadata Filtering”Load Data for Metadata Filtering
Section titled “Load Data for Metadata Filtering”import os
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY")
from llama_index.core import ( VectorStoreIndex, SimpleDirectoryReader, StorageContext,)from llama_index.vector_stores.alibabacloud_mysql.base import ( AlibabaCloudMySQLVectorStore,)
# set Embbeding modelfrom llama_index.core import Settingsfrom llama_index.embeddings.dashscope import DashScopeEmbedding
Settings.embed_model = DashScopeEmbedding(api_key=DASHSCOPE_API_KEY)
# config llm modelfrom llama_index.llms.dashscope import DashScope, DashScopeGenerationModels
dashscope_llm = DashScope( model_name=DashScopeGenerationModels.QWEN_MAX, api_key=DASHSCOPE_API_KEY)
documents = SimpleDirectoryReader("data/paul_graham/").load_data()print(f"Total documents: {len(documents)}")print(f"First document, id: {documents[0].doc_id}")print(f"First document, hash: {documents[0].hash}")print( "First document, text" f" ({len(documents[0].text)} characters):\n{'='*20}\n{documents[0].text[:360]} ...")
print( """################## generate vector with some metadata for Metadata Filtering#################""")client = AlibabaCloudMySQLVectorStore.from_params( host="rm-***.mysql.***.rds.aliyuncs.com", port=3306, user="user", password="password", database="llama_index_meta_test", distance_method="COSINE",)storage_context = StorageContext.from_defaults(vector_store=client)index = VectorStoreIndex.from_documents( documents, storage_context=storage_context, show_progress=True)
from llama_index.core import Documentimport regex as re
# Split the text into paragraphs.text_chunks = documents[0].text.split("\n\n")
# Create a document for each footnotefootnotes = [ Document( text=chunk, id=documents[0].doc_id, metadata={ "is_footnote": bool(re.search(r"^\s*\[\d+\]\s*", chunk)), "mark_id": i, }, ) for i, chunk in enumerate(text_chunks) if bool(re.search(r"^\s*\[\d+\]\s*", chunk))]
# Insert the footnotes into the indexfor f in footnotes: index.insert(f)Query using AlibabaCloudMySQL with Search Test
Section titled “Query using AlibabaCloudMySQL with Search Test”import os
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY")
from llama_index.core import VectorStoreIndexfrom llama_index.vector_stores.alibabacloud_mysql.base import ( AlibabaCloudMySQLVectorStore,)
# set Embbeding modelfrom llama_index.core import Settingsfrom llama_index.embeddings.dashscope import DashScopeEmbedding
embed_model = DashScopeEmbedding(api_key=DASHSCOPE_API_KEY)# Global SettingsSettings.embed_model = embed_model
# config llm modelfrom llama_index.llms.dashscope import DashScope, DashScopeGenerationModels
dashscope_llm = DashScope( model_name=DashScopeGenerationModels.QWEN_MAX, api_key=DASHSCOPE_API_KEY)
print( """################## Querying with Metadata Filtering including Search Test#################""")client = AlibabaCloudMySQLVectorStore.from_params( host="rm-***.mysql.***.rds.aliyuncs.com", port=3306, user="user", password="password", database="llama_index_meta_test", distance_method="COSINE",)index = VectorStoreIndex.from_vector_store( vector_store=client, embed_model=embed_model)
from llama_index.core.vector_stores import ( MetadataFilters, MetadataFilter, FilterOperator, FilterCondition,)
QUESTION = "What did the author about space aliens and lisp?"print(f"---------------------------------------------")print(f"Question: {QUESTION}")filters = MetadataFilters( filters=[ MetadataFilter( key="is_footnote", value="true", operator=FilterOperator.EQ ), MetadataFilter(key="mark_id", value=0, operator=FilterOperator.GTE), ], condition=FilterCondition.AND,)print(f"---------------------------------------------")for i in range(len(filters.filters)): print(f"Filter[{i}]: {filters.filters[i]}")print(f"Filter Condition: {filters.condition}")print(f"---------------------------------------------")retriever = index.as_retriever( filters=filters,)result = retriever.retrieve(QUESTION)for node in result: print("Search Test") print(f"---------------------------------------------") print(f"Score: {node.score:.3f}") print(node.get_content()) print(f"---------------------------------------------")
# Create a query engine that only searches certain footnotes.footnote_query_engine = index.as_query_engine( filters=filters, llm=dashscope_llm,)
res = footnote_query_engine.query(QUESTION)print(f"Answer: {res.response}")print(f"---------------------------------------------\n\n")