Welcome to LlamaParse
Documentation for LlamaParse, the enterprise agentic OCR and document agent platform.
Build document agents powered by agentic OCR
LlamaParse is the enterprise platform to go from raw documents to production document agents: agentic OCR and parsing, structured extraction, searchable knowledge, and deployable agents—all in one platform.
Sign up to start building today!
Install
Section titled “Install”pip install llama-cloud>=1.0npm install @llamaindex/llama-cloudSet your API key:
export LLAMA_CLOUD_API_KEY=llx-...Get an API key from the LlamaCloud dashboard.
Quick Start
Section titled “Quick Start”Agentic OCR and parsing for 130+ formats. Turn PDFs and scans into LLM-ready text—the foundation for document agents.
from llama_cloud import AsyncLlamaCloudimport asyncio
async def main(): client = AsyncLlamaCloud() # Uses LLAMA_CLOUD_API_KEY env var
# Upload and parse a document file = await client.files.create(file="document.pdf", purpose="parse") result = await client.parsing.parse( file_id=file.id, tier="agentic", version="latest", expand=["markdown"], )
# Get markdown output print(result.markdown.pages[0].markdown)
asyncio.run(main())Structured data from documents with custom schemas. Feed agents with clean entities, tables, and fields.
from pydantic import BaseModel, Fieldfrom llama_cloud import LlamaCloud
# Define your schemaclass Resume(BaseModel): name: str = Field(description="Full name of candidate") email: str = Field(description="Email address") skills: list[str] = Field(description="Technical skills")
client = LlamaCloud()
# Create extraction agent with schemaagent = client.extraction.extraction_agents.create( name="resume-parser", data_schema=Resume.model_json_schema(), config={})
# Upload and extractfile = client.files.create(file="resume.pdf", purpose="extract")result = client.extraction.jobs.extract( extraction_agent_id=agent.id, file_id=file.id,)print(result.data)Ingest, chunk, and embed into searchable indexes. Power RAG and retrieval for document agents. Index is designed for UI-first setup with SDK integration. Start in the LlamaCloud dashboard to create your index, then integrate:
from llama_cloud import LlamaCloud
client = LlamaCloud() # Uses LLAMA_CLOUD_API_KEY env var
# Retrieve relevant nodes from the indexresults = client.pipelines.retrieve( pipeline_id="your-pipeline-id", query="Your query here", # -- Customize search behavior -- # dense_similarity_top_k=20, # sparse_similarity_top_k=20, # alpha=0.5, # -- Control reranking behavior -- # enable_reranking=True, # rerank_top_n=5,)
for n in results.retrieval_nodes: print(f"Score: {n.score}, Text: {n.node.text}")Categorize documents with natural-language rules. Pre-processing for extraction, parsing, or indexing.
from llama_cloud import LlamaCloud
client = LlamaCloud()
# Upload a documentfile = client.files.create(file="document.pdf", purpose="classify")
# Classify with natural language rulesresult = client.classifier.classify( file_ids=[file.id], rules=[ { "type": "invoice", "description": "Documents with invoice numbers, line items, and totals" }, { "type": "receipt", "description": "Short POS receipts with merchant and total" }, { "type": "contract", "description": "Legal agreements with terms and signatures" }, ], mode="FAST", # or "MULTIMODAL" for visual docs)
for item in result.items: print(f"Type: {item.result.type}, Confidence: {item.result.confidence}")Segment concatenated PDFs into logical sections. AI-powered classification to split combined documents.
from llama_cloud import LlamaCloud
client = LlamaCloud()
# Upload a combined PDFfile = client.files.create(file="combined.pdf", purpose="split")
# Split into logical sectionsresult = await client.beta.split.split( categories=[ { "name": "invoice", "description": "Commercial document with line items and totals" }, { "name": "contract", "description": "Legal agreement with terms and signatures" }, ], document_input={"type": "file_id", "value": file.id},)
for segment in result.result.segments: print(f"Pages {segment.pages}: {segment.category} ({segment.confidence_category})")Extract tables and metadata from messy spreadsheets. Output as Parquet files with rich cell metadata.
from llama_cloud import LlamaCloud
client = LlamaCloud()
# Upload a spreadsheetfile = client.files.create(file="spreadsheet.xlsx", purpose="parse")
# Extract tables and regionsresult = client.beta.sheets.parse( file_id=file.id, config={"generate_additional_metadata": True},)
# Print extracted regionsprint(f"Found {len(result.regions)} regions")for region in result.regions: print(f" - {region.region_id}: {region.title} ({region.location})")Agentic OCR and parsing for 130+ formats. Turn PDFs and scans into LLM-ready text—the foundation for document agents.
import LlamaCloud from '@llamaindex/llama-cloud';import fs from 'fs';
const client = new LlamaCloud(); // Uses LLAMA_CLOUD_API_KEY env var
// Upload and parse a documentconst file = await client.files.create({ file: fs.createReadStream('document.pdf'), purpose: 'parse',});const result = await client.parsing.parse({ file_id: file.id, tier: 'agentic', version: 'latest', expand: ['markdown']});
// Get markdown outputconsole.log(result.markdown.pages[0].markdown);Structured data from documents with custom schemas. Feed agents with clean entities, tables, and fields.
import LlamaCloud from '@llamaindex/llama-cloud';import { z } from 'zod';import fs from 'fs';
// Define your schema with Zodconst ResumeSchema = z.object({ name: z.string().describe('Full name of candidate'), email: z.string().describe('Email address'), skills: z.array(z.string()).describe('Technical skills'),});
const client = new LlamaCloud();
// Create extraction agent with schemaconst agent = await client.extraction.extractionAgents.create({ name: 'resume-parser', dataSchema: ResumeSchema, config: {},});
// Upload and extractconst file = await client.files.create({ file: fs.createReadStream('resume.pdf'), purpose: 'extract',});const result = await client.extraction.jobs.extract({ extraction_agent_id: agent.id, file_id: file.id,});console.log(result.data);Ingest, chunk, and embed into searchable indexes. Power RAG and retrieval for document agents. Index is designed for UI-first setup with SDK integration. Start in the LlamaCloud dashboard to create your index, then integrate:
import LlamaCloud from '@llamaindex/llama-cloud';
const client = new LlamaCloud(); // Uses LLAMA_CLOUD_API_KEY env var
// Retrieve relevant nodes from the indexconst results = await client.pipelines.retrieve('your-pipeline-id', { query: 'Your query here', // -- Customize search behavior -- // dense_similarity_top_k: 20, // sparse_similarity_top_k: 20, // alpha: 0.5, // -- Control reranking behavior -- // enable_reranking: true, // rerank_top_n: 5,});
for (const node of results.retrieval_nodes || []) { console.log(`Score: ${node.score}, Text: ${node.node?.text}`);}Categorize documents with natural-language rules. Pre-processing for extraction, parsing, or indexing.
import LlamaCloud from '@llamaindex/llama-cloud';import fs from 'fs';
const client = new LlamaCloud();
// Upload a documentconst file = await client.files.create({ file: fs.createReadStream('document.pdf'), purpose: 'classify',});
// Classify with natural language rulesconst result = await client.classifier.classify({ file_ids: [file.id], rules: [ { type: 'invoice', description: 'Documents with invoice numbers, line items, and totals', }, { type: 'receipt', description: 'Short POS receipts with merchant and total', }, { type: 'contract', description: 'Legal agreements with terms and signatures', }, ], mode: 'FAST', // or 'MULTIMODAL' for visual docs});
for (const item of result.items) { if (item.result) { console.log(`Type: ${item.result.type}, Confidence: ${item.result.confidence}`); }}Segment concatenated PDFs into logical sections. AI-powered classification to split combined documents.
import LlamaCloud from '@llamaindex/llama-cloud';import fs from 'fs';
const client = new LlamaCloud();
// Upload a combined PDFconst file = await client.files.create({ file: fs.createReadStream('combined.pdf'), purpose: 'split',});
// Split into logical sectionsconst result = await client.beta.split.split({ categories: [ { name: 'invoice', description: 'Commercial document with line items and totals', }, { name: 'contract', description: 'Legal agreement with terms and signatures', }, ], document_input: { type: 'file_id', value: file.id },});
for (const segment of result.result.segments) { console.log(`Pages ${segment.pages}: ${segment.category} (${segment.confidence_category})`);}Extract tables and metadata from messy spreadsheets. Output as Parquet files with rich cell metadata.
import LlamaCloud from '@llamaindex/llama-cloud';import fs from 'fs';
const client = new LlamaCloud();
// Upload a spreadsheetconst file = await client.files.create({ file: fs.createReadStream('spreadsheet.xlsx'), purpose: 'parse',});
// Extract tables and regionsconst result = await client.beta.sheets.parse({ file_id: file.id, config: { generate_additional_metadata: true },});
// Print extracted regionsconsole.log(`Found ${result.regions?.length || 0} regions`);for (const region of result.regions || []) { console.log(` - ${region.region_id}: ${region.title} (${region.location})`);}Resources
Section titled “Resources”- Web UI - Visual interface for all products
- Python SDK -
pip install llama-cloud - TypeScript SDK -
npm install @llamaindex/llama-cloud - API Reference