LlamaCloud API & Clients Guide
This guide highlights the core workflow.
App setup
Section titled “App setup”Install API client package
pip install llama-cloud
Import and configure client
from llama_cloud.client import LlamaCloud
client = LlamaCloud(token='<llama-cloud-api-key>')
Install API client package
pip install llama-cloud
Import and configure client
from llama_cloud.client import AsyncLlamaCloud
async_client = AsyncLlamaCloud(token='<llama-cloud-api-key>')
Install API client package
npm install llama-cloud-services
Import and configure client
import { LlamaCloudApiClient } from 'llama-cloud-services';
const client = new LlamaCloudApiClient({ token: apiKey});
Create new index
Section titled “Create new index”Upload files
Section titled “Upload files”with open('test.pdf', 'rb') as f: file = client.files.upload_file(upload_file=f)
with open('test.pdf', 'rb') as f: file = await async_client.files.upload_file(upload_file=f)
import fs from "fs"
const filePath = "node_modules/llamaindex/examples/abramov.txt";
file = client.files.uploadFile(project.id, fs.createReadStream(filePath))
Configure data sources
Section titled “Configure data sources”from llama_cloud.types import CloudS3DataSource
ds = { 'name': 's3', 'source_type': 'S3', 'component': CloudS3DataSource(bucket='test-bucket')}data_source = client.data_sources.create_data_source(request=ds)
from llama_cloud.types import CloudS3DataSource
ds = {'name': 's3','source_type': 'S3','component': CloudS3DataSource(bucket='test-bucket')}data_source = await async_client.data_sources.create_data_source(request=ds)
const s3 = { 'name': 's3', 'sourceType': 'S3', 'component': { 'bucket': 'test-bucket' }}data_source = await client.dataSources.createDataSource({ projectId: projectId, body: s3})
Configure data sinks
Section titled “Configure data sinks”from llama_cloud.types import CloudPineconeVectorStore
ds = {'name': 'pinecone','sink_type': 'PINECONE','component': CloudPineconeVectorStore(api_key='test-key', index_name='test-index')}data_sink = client.data_sinks.create_data_sink(request=ds)
from llama_cloud.types import CloudPineconeVectorStore
ds = {'name': 'pinecone','sink_type': 'PINECONE','component': CloudPineconeVectorStore(api_key='test-key', index_name='test-index')}data_sink = await async_client.data_sinks.create_data_sink(request=ds)
const pinecone = { 'name': 'pinecone', 'sinkType': 'PINECONE', 'component': { 'api_key': 'test-key', 'index_name': 'test-index' }}data_sink = client.dataSinks.createDataSink({ projectId: projectId, body: pinecone})
Setup transformation and embedding config
Section titled “Setup transformation and embedding config”# Embedding configembedding_config = { 'type': 'OPENAI_EMBEDDING', 'component': { 'api_key': '<YOUR_API_KEY_HERE>', # editable 'model_name': 'text-embedding-ada-002' # editable }}
# Transformation auto configtransform_config = { 'mode': 'auto', 'config': { 'chunk_size': 1024, # editable 'chunk_overlap': 20 # editable }}
Create index (i.e. pipeline)
Section titled “Create index (i.e. pipeline)”pipeline = { 'name': 'test-pipeline', 'embedding_config': embedding_config, 'transform_config': transform_config, 'data_sink_id': data_sink.id}
pipeline = client.pipelines.upsert_pipeline(request=pipeline)
pipeline = { 'name': 'test-pipeline', 'embedding_config': embedding_config, 'transform_config': transform_config, 'data_sink_id': data_sink.id}
pipeline = await async_client.pipelines.upsert_pipeline(request=pipeline)
const pipeline = { 'name': 'test-pipeline', "embedding_config": embedding_config, "transform_config": transform_config, 'dataSinkId': data_sink.id}
await client.pipelines.upsertPipeline({projectId: projectId,body: pipeline})
Add files to index
Section titled “Add files to index”files = [ { 'file_id': file.id, 'custom_metadata': { 'document_type': 'INVOICE' # Optioal, an example on how to add custom metadata to your files } }]
pipeline_files = client.pipelines.add_files_to_pipeline(pipeline.id, request=files)
files = [ { 'file_id': file.id, 'custom_metadata': { 'document_type': 'INVOICE' # Optioal, an example on how to add custom metadata to your files } }]
pipeline_files = await async_client.pipelines.add_files_to_pipeline(pipeline.id, request=files)
const files = [ { 'file_id': file.id, 'custom_metadata': { 'document_type': 'INVOICE' # Optioal, an example on how to add custom metadata to your files } }]
pipeline_files = client.pipelines.addFilesToPipeline(pipeline.id, files)
Add data sources to index
Section titled “Add data sources to index”data_sources = [ { 'data_source_id': data_source.id, 'sync_interval': 43200.0 # Optional, scheduled sync frequency in seconds. In this case, every 12 hours. }]
pipeline_data_sources = client.pipelines.add_data_sources_to_pipeline(pipeline.id, request=data_sources)
data_sources = [ { 'data_source_id': data_source.id, 'sync_interval': 43200.0 # Optional, scheduled sync frequency in seconds. In this case, every 12 hours. }]
pipeline_data_sources = await async_client.pipelines.add_data_sources_to_pipeline(pipeline.id, request=data_sources)
const data_sources = [ { 'data_source_id': data_source.id, 'sync_interval': 43200.0 // Optional, scheduled sync frequency in seconds. In this case, every 12 hours. }]
pipeline_data_sources = client.pipelines.addDataSourcesToPipeline(pipeline.id, data_sources)
Add documents to index
Section titled “Add documents to index”from llama_cloud.types import CloudDocumentCreate
documents = [CloudDocumentCreate(text='test-text',metadata={'test-key': 'test-val'})]
documents = client.pipelines.create_batch_pipeline_documents(pipeline.id, request=documents)
from llama_cloud.types import CloudDocumentCreate
documents = [CloudDocumentCreate(text='test-text',metadata={'test-key': 'test-val'})]
documents = await async_client.pipelines.create_batch_pipeline_documents(pipeline.id, request=documents)
const documents = [ { 'text': 'test-text', 'metadata': { 'test-key': 'test-val' } }]
documents = client.pipelines.createBatchPipelineDocuments(pipeline.id, documents)
Observe ingestion status & history
Section titled “Observe ingestion status & history”Get index status
Section titled “Get index status”status = client.pipelines.get_pipeline_status(pipeline.id)
status = await async_client.pipelines.get_pipeline_status(pipeline.id)
status = client.pipelines.getPipelineStatus(pipeline.id)
Get ingestion job history
Section titled “Get ingestion job history”jobs = client.pipelines.list_pipeline_jobs(pipeline.id)
jobs = await async_client.pipelines.list_pipeline_jobs(pipeline.id)
jobs = client.pipelines.listPipelineJobs(pipeline.id)
Run search (i.e. retrieval endpoint)
Section titled “Run search (i.e. retrieval endpoint)”results = client.pipelines.run_search(pipeline.id, query='test-query')
results = await async_client.pipelines.run_search(pipeline.id, query='test-query')
results = client.pipelines.runSearch(pipeline.id, { query: 'test-query'})