llama-parse is async-first, running the async code in a notebook requires the use of nest_asyncio
import nest_asyncio
nest_asyncio.apply()
import os
API access to llama-cloud
Section titled âAPI access to llama-cloudâos.environ[âLLAMA_CLOUD_API_KEYâ] = âllx-âŚâ
Using OpenAI API for embeddings/llms
Section titled âUsing OpenAI API for embeddings/llmsâos.environ[âOPENAI_API_KEYâ] = âsk-âŚâ
#### Setting LLM and Embedding Model
```pythonfrom llama_index.llms.openai import OpenAIfrom llama_index.embeddings.openai import OpenAIEmbeddingfrom llama_index.core import VectorStoreIndexfrom llama_index.core import Settings
embed_model = OpenAIEmbedding(model="text-embedding-3-small")llm = OpenAI(model="gpt-3.5-turbo-0125")
Settings.llm = llmSettings.embed_model = embed_model
LlamaParse PDF reader for PDF Parsing
Section titled âLlamaParse PDF reader for PDF ParsingâWe compare two different retrieval/ queryengine strategies.
- Using raw Markdown text as nodes for building index and applying a simple query engine for generating results.
- Using MarkdownElementNodeParser for parsing the LlamaParse output Markdown results and building a recursive retriever query engine for generation.
# LlamaParse PDF reader for PDF Parsingfrom llama_parse import LlamaParse
documents = LlamaParse(result_type="markdown").load_data( "./uber_10q_march_2022.pdf")# Started parsing the file under job_id b76a572b-d2bb-42ae-bad9-b9810049f1af
Started parsing the file under job_id 0ef2f65b-9cab-4ca8-b221-d20f1f6d1336
print(documents[0].text[:1000] + "...")
# UNITED STATES SECURITIES AND EXCHANGE COMMISSION
# Washington, D.C. 20549
# FORM 10-Q
(Mark One)
â QUARTERLY REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934
For the quarterly period ended March 31, 2022
OR
â TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934
For the transition period from_____ to _____
Commission File Number: 001-38902
# UBER TECHNOLOGIES, INC.
(Exact name of registrant as specified in its charter)
Not Applicable
(Former name, former address and former fiscal year, if changed since last report)
|Delaware|45-2647441||---|---||(State or other jurisdiction of incorporation or organization)|(I.R.S. Employer Identification No.)||1515 3rd Street|San Francisco, California 94158||(Address of principal executive offices, including zip code)|(415) 612-8582||(Registrantâs telephone number, including area code)| |
# Securities registered pursuant to Section 12(b) of the Act:
|Title of each c...
from llama_index.core.node_parser import MarkdownElementNodeParser
node_parser = MarkdownElementNodeParser( llm=OpenAI(model="gpt-3.5-turbo-0125"), num_workers=8)
nodes = node_parser.get_nodes_from_documents(documents)
3it [00:00, 41803.69it/s]1it [00:00, 22310.13it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]1it [00:00, 20867.18it/s]1it [00:00, 22429.43it/s]1it [00:00, 21399.51it/s]1it [00:00, 20460.02it/s]1it [00:00, 19508.39it/s]1it [00:00, 19508.39it/s]5it [00:00, 85598.04it/s]0it [00:00, ?it/s]2it [00:00, 41527.76it/s]2it [00:00, 46091.25it/s]2it [00:00, 40524.68it/s]2it [00:00, 38836.15it/s]2it [00:00, 42366.71it/s]2it [00:00, 41943.04it/s]1it [00:00, 23967.45it/s]1it [00:00, 24818.37it/s]1it [00:00, 25890.77it/s]4it [00:00, 72628.64it/s]2it [00:00, 38836.15it/s]3it [00:00, 41943.04it/s]0it [00:00, ?it/s]3it [00:00, 58254.22it/s]3it [00:00, 53773.13it/s]1it [00:00, 25575.02it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]1it [00:00, 26051.58it/s]1it [00:00, 21509.25it/s]0it [00:00, ?it/s]1it [00:00, 16008.79it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]2it [00:00, 42153.81it/s]4it [00:00, 76260.07it/s]5it [00:00, 75166.74it/s]2it [00:00, 39383.14it/s]2it [00:00, 39756.44it/s]1it [00:00, 24244.53it/s]2it [00:00, 42153.81it/s]0it [00:00, ?it/s]1it [00:00, 23045.63it/s]1it [00:00, 23431.87it/s]1it [00:00, 24528.09it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]1it [00:00, 10810.06it/s]2it [00:00, 8473.34it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]1it [00:00, 12633.45it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]0it [00:00, ?it/s]
text_nodes, index_nodes = node_parser.get_nodes_and_objects(nodes)
text_nodes[0]
TextNode(id_='c6ffea61-1221-40e3-b0e0-5b24cfbd02d5', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='33b7b29c-8eba-458b-a25f-bb8f88951e92', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='3d4ec5b02a042598b0ea47cdac56453869c17b531a10f60343e9598e05a9390e'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='de618b65-c78a-4390-8536-4e9e295c0e49', node_type=<ObjectType.INDEX: '3'>, metadata={'col_schema': 'Column: Delaware\nType: string\nSummary: State or other jurisdiction of incorporation or organization\n\nColumn: 45-2647441\nType: string\nSummary: I.R.S. Employer Identification No.'}, hash='c008153189b8dd031a3e5e694239a50ebd21f42602676f072d9746241fcef858')}, text='UNITED STATES SECURITIES AND EXCHANGE COMMISSION\n\n Washington, D.C. 20549\n\n FORM 10-Q\n\n(Mark One)\n\nâ QUARTERLY REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n\nFor the quarterly period ended March 31, 2022\n\nOR\n\nâ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n\nFor the transition period from_____ to _____\n\nCommission File Number: 001-38902\n\n UBER TECHNOLOGIES, INC.\n\n(Exact name of registrant as specified in its charter)\n\nNot Applicable\n\n(Former name, former address and former fiscal year, if changed since last report)', mimetype='text/plain', start_char_idx=1, end_char_idx=595, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')
index_nodes[0]
IndexNode(id_='de618b65-c78a-4390-8536-4e9e295c0e49', embedding=None, metadata={'col_schema': 'Column: Delaware\nType: string\nSummary: State or other jurisdiction of incorporation or organization\n\nColumn: 45-2647441\nType: string\nSummary: I.R.S. Employer Identification No.'}, excluded_embed_metadata_keys=['col_schema'], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='33b7b29c-8eba-458b-a25f-bb8f88951e92', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='3d4ec5b02a042598b0ea47cdac56453869c17b531a10f60343e9598e05a9390e'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='c6ffea61-1221-40e3-b0e0-5b24cfbd02d5', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='0cafbb2bbffe3085738e748c9ed19c5b88f6b300d876820fc3caa7afa8f0627f'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='c57f8dab-7b69-4850-8885-6a9cf0f531f9', node_type=<ObjectType.TEXT: '1'>, metadata={'table_df': "{'Delaware': {0: '(State or other jurisdiction of incorporation or organization)', 1: '1515 3rd Street', 2: '(Address of principal executive offices, including zip code)', 3: '(Registrantâs telephone number, including area code)'}, '45-2647441': {0: '(I.R.S. Employer Identification No.)', 1: 'San Francisco, California 94158', 2: '(415) 612-8582', 3: ' '}}", 'table_summary': "Table providing information about a company's incorporation details, address of principal executive offices, and contact information.,\nwith the following columns:\n- Delaware: State or other jurisdiction of incorporation or organization\n- 45-2647441: I.R.S. Employer Identification No.\n"}, hash='fadc844962620525c1d3c8d7ff1693a090642818928f9ce7600117258a39aa04')}, text="Table providing information about a company's incorporation details, address of principal executive offices, and contact information.,\nwith the following columns:\n- Delaware: State or other jurisdiction of incorporation or organization\n- 45-2647441: I.R.S. Employer Identification No.\n", mimetype='text/plain', start_char_idx=601, end_char_idx=919, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n', index_id='c57f8dab-7b69-4850-8885-6a9cf0f531f9', obj=TextNode(id_='c57f8dab-7b69-4850-8885-6a9cf0f531f9', embedding=None, metadata={'table_df': "{'Delaware': {0: '(State or other jurisdiction of incorporation or organization)', 1: '1515 3rd Street', 2: '(Address of principal executive offices, including zip code)', 3: '(Registrantâs telephone number, including area code)'}, '45-2647441': {0: '(I.R.S. Employer Identification No.)', 1: 'San Francisco, California 94158', 2: '(415) 612-8582', 3: ' '}}", 'table_summary': "Table providing information about a company's incorporation details, address of principal executive offices, and contact information.,\nwith the following columns:\n- Delaware: State or other jurisdiction of incorporation or organization\n- 45-2647441: I.R.S. Employer Identification No.\n"}, excluded_embed_metadata_keys=['table_df', 'table_summary'], excluded_llm_metadata_keys=['table_df', 'table_summary'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='33b7b29c-8eba-458b-a25f-bb8f88951e92', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='3d4ec5b02a042598b0ea47cdac56453869c17b531a10f60343e9598e05a9390e'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='de618b65-c78a-4390-8536-4e9e295c0e49', node_type=<ObjectType.INDEX: '3'>, metadata={'col_schema': 'Column: Delaware\nType: string\nSummary: State or other jurisdiction of incorporation or organization\n\nColumn: 45-2647441\nType: string\nSummary: I.R.S. Employer Identification No.'}, hash='c008153189b8dd031a3e5e694239a50ebd21f42602676f072d9746241fcef858'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='c0fa90a9-fe14-46fa-8434-cd70e134d40e', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='cc6b1c09572e3a0bd06a93ccea32a5562b36a393a36ede3852f4a5fc946c51fd')}, text="Table providing information about a company's incorporation details, address of principal executive offices, and contact information.,\nwith the following columns:\n- Delaware: State or other jurisdiction of incorporation or organization\n- 45-2647441: I.R.S. Employer Identification No.\n\n|Delaware|45-2647441|\n|---|---|\n|(State or other jurisdiction of incorporation or organization)|(I.R.S. Employer Identification No.)|\n|1515 3rd Street|San Francisco, California 94158|\n|(Address of principal executive offices, including zip code)|(415) 612-8582|\n|(Registrantâs telephone number, including area code)| |\n", mimetype='text/plain', start_char_idx=601, end_char_idx=919, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'))
Build Index
Section titled âBuild Indexârecursive_index = VectorStoreIndex(nodes=text_nodes + index_nodes)raw_index = VectorStoreIndex.from_documents(documents)
Create Query Engines
Section titled âCreate Query Enginesâfrom llama_index.postprocessor.flag_embedding_reranker import ( FlagEmbeddingReranker,)
reranker = FlagEmbeddingReranker( top_n=5, model="BAAI/bge-reranker-large",)
tokenizer_config.json: 0%| | 0.00/443 [00:00<?, ?B/s]
sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00<?, ?B/s]
tokenizer.json: 0%| | 0.00/17.1M [00:00<?, ?B/s]
special_tokens_map.json: 0%| | 0.00/279 [00:00<?, ?B/s]
config.json: 0%| | 0.00/801 [00:00<?, ?B/s]
model.safetensors: 0%| | 0.00/2.24G [00:00<?, ?B/s]
recursive_query_engine = recursive_index.as_query_engine( similarity_top_k=15, node_postprocessors=[reranker], verbose=True)
raw_query_engine = raw_index.as_query_engine( similarity_top_k=15, node_postprocessors=[reranker])
Querying with two different query engines
Section titled âQuerying with two different query enginesâwe compare base query engine vs recursive query engine with tables
Table Query Task: Queries for Table Question Answering
Section titled âTable Query Task: Queries for Table Question Answeringâquery = "What is the change of free cash flow and what is the rate from the financial and operational highlights?"
response_1 = raw_query_engine.query(query)print("\n************New LlamaParse+ Basic Query Engine************")print(response_1)
response_2 = recursive_query_engine.query(query)print( "\n************New LlamaParse+ Recursive Retriever Query Engine************")print(response_2)
************New LlamaParse+ Basic Query Engine************The change in free cash flow from the financial and operational highlights is an increase of $826 million, from a net cash used in operating activities of $611 million in 2021 to net cash provided by operating activities of $215 million in 2022. The rate of this change is a positive improvement.[1;3;38;2;11;159;203mRetrieval entering 015f9778-1f7c-44cd-9e26-90f2c9e21550: TextNode[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?[0m[1;3;38;2;11;159;203mRetrieval entering 5e8febd0-0c43-4552-9499-9465674b8877: TextNode[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?[0m[1;3;38;2;11;159;203mRetrieval entering d3c8d59b-9d7e-4088-94e9-3e58aba09f10: TextNode[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?[0m[1;3;38;2;11;159;203mRetrieval entering 25385e8f-24df-4660-959b-c499cc220246: TextNode[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?[0m[1;3;38;2;11;159;203mRetrieval entering 6b48fe6f-a60e-425a-aba5-22b62d1b4512: TextNode[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?[0m[1;3;38;2;11;159;203mRetrieval entering aa60761d-1d8a-4896-aef9-e41553f17558: TextNode[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?[0m[1;3;38;2;11;159;203mRetrieval entering d0634d58-0589-47cb-9921-d2d57d88240f: TextNode[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?[0m[1;3;38;2;11;159;203mRetrieval entering 492b36c1-8b39-4db9-8dca-dd9f6c488a9d: TextNode[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What is the change of free cash flow and what is the rate from the financial and operational highlights?[0m************New LlamaParse+ Recursive Retriever Query Engine************The change in free cash flow from 2021 to 2022 is an increase of $635 million. This change represents a significant improvement in free cash flow performance over the period.