Welcome to LlamaParse
Build document agents powered by agentic OCR
LlamaParse is the enterprise platform to go from raw documents to production document agents: agentic OCR and parsing, structured extraction, searchable knowledge, and deployable agents—all in one platform.
Sign up to start building today!
Install
Section titled “Install”pip install llama-cloud>=1.0npm install @llamaindex/llama-cloudSet your API key:
export LLAMA_CLOUD_API_KEY=llx-...Get an API key from the LlamaCloud dashboard.
Quick Start
Section titled “Quick Start”Agentic OCR and parsing for 130+ formats. Turn PDFs and scans into LLM-ready text—the foundation for document agents.
from llama_cloud import AsyncLlamaCloudimport asyncio
async def main(): client = AsyncLlamaCloud() # Uses LLAMA_CLOUD_API_KEY env var
# Upload and parse a document file = await client.files.create(file="document.pdf", purpose="parse") result = await client.parsing.parse( file_id=file.id, tier="agentic", version="latest", expand=["markdown"], )
# Get markdown output print(result.markdown.pages[0].markdown)
asyncio.run(main())Structured data from documents with custom schemas. Feed agents with clean entities, tables, and fields.
from pydantic import BaseModel, Fieldfrom llama_cloud import LlamaCloud
# Define your schemaclass Resume(BaseModel): name: str = Field(description="Full name of candidate") email: str = Field(description="Email address") skills: list[str] = Field(description="Technical skills")
client = LlamaCloud()
# Create extraction agent with schemaagent = client.extraction.extraction_agents.create( name="resume-parser", data_schema=Resume.model_json_schema(), config={})
# Upload and extractfile = client.files.create(file="resume.pdf", purpose="extract")result = client.extraction.jobs.extract( extraction_agent_id=agent.id, file_id=file.id,)print(result.data)Ingest, chunk, and embed into searchable indexes. Power RAG and retrieval for document agents. Index is designed for UI-first setup with SDK integration. Start in the LlamaCloud dashboard to create your index, then integrate:
from llama_cloud import LlamaCloud
client = LlamaCloud() # Uses LLAMA_CLOUD_API_KEY env var
# Retrieve relevant nodes from the indexresults = client.pipelines.retrieve( pipeline_id="your-pipeline-id", query="Your query here", # -- Customize search behavior -- # dense_similarity_top_k=20, # sparse_similarity_top_k=20, # alpha=0.5, # -- Control reranking behavior -- # enable_reranking=True, # rerank_top_n=5,)
for n in results.retrieval_nodes: print(f"Score: {n.score}, Text: {n.node.text}")Categorize documents with natural-language rules. Pre-processing for extraction, parsing, or indexing.
from llama_cloud import LlamaCloud
client = LlamaCloud()
# Upload a documentfile = client.files.create(file="document.pdf", purpose="classify")
# Classify with natural language rulesresult = client.classifier.classify( file_ids=[file.id], rules=[ { "type": "invoice", "description": "Documents with invoice numbers, line items, and totals" }, { "type": "receipt", "description": "Short POS receipts with merchant and total" }, { "type": "contract", "description": "Legal agreements with terms and signatures" }, ], mode="FAST", # or "MULTIMODAL" for visual docs)
for item in result.items: print(f"Type: {item.result.type}, Confidence: {item.result.confidence}")Segment concatenated PDFs into logical sections. AI-powered classification to split combined documents.
from llama_cloud import LlamaCloud
client = LlamaCloud()
# Upload a combined PDFfile = client.files.create(file="combined.pdf", purpose="split")
# Split into logical sectionsresult = await client.beta.split.split( categories=[ { "name": "invoice", "description": "Commercial document with line items and totals" }, { "name": "contract", "description": "Legal agreement with terms and signatures" }, ], document_input={"type": "file_id", "value": file.id},)
for segment in result.result.segments: print(f"Pages {segment.pages}: {segment.category} ({segment.confidence_category})")Extract tables and metadata from messy spreadsheets. Output as Parquet files with rich cell metadata.
from llama_cloud import LlamaCloud
client = LlamaCloud()
# Upload a spreadsheetfile = client.files.create(file="spreadsheet.xlsx", purpose="parse")
# Extract tables and regionsresult = client.beta.sheets.parse( file_id=file.id, config={"generate_additional_metadata": True},)
# Print extracted regionsprint(f"Found {len(result.regions)} regions")for region in result.regions: print(f" - {region.region_id}: {region.title} ({region.location})")Agentic OCR and parsing for 130+ formats. Turn PDFs and scans into LLM-ready text—the foundation for document agents.
import LlamaCloud from '@llamaindex/llama-cloud';import fs from 'fs';
const client = new LlamaCloud(); // Uses LLAMA_CLOUD_API_KEY env var
// Upload and parse a documentconst file = await client.files.create({ file: fs.createReadStream('document.pdf'), purpose: 'parse',});const result = await client.parsing.parse({ file_id: file.id, tier: 'agentic', version: 'latest', expand: ['markdown']});
// Get markdown outputconsole.log(result.markdown.pages[0].markdown);Structured data from documents with custom schemas. Feed agents with clean entities, tables, and fields.
import LlamaCloud from '@llamaindex/llama-cloud';import { z } from 'zod';import fs from 'fs';
// Define your schema with Zodconst ResumeSchema = z.object({ name: z.string().describe('Full name of candidate'), email: z.string().describe('Email address'), skills: z.array(z.string()).describe('Technical skills'),});
const client = new LlamaCloud();
// Create extraction agent with schemaconst agent = await client.extraction.extractionAgents.create({ name: 'resume-parser', dataSchema: ResumeSchema, config: {},});
// Upload and extractconst file = await client.files.create({ file: fs.createReadStream('resume.pdf'), purpose: 'extract',});const result = await client.extraction.jobs.extract({ extraction_agent_id: agent.id, file_id: file.id,});console.log(result.data);Ingest, chunk, and embed into searchable indexes. Power RAG and retrieval for document agents. Index is designed for UI-first setup with SDK integration. Start in the LlamaCloud dashboard to create your index, then integrate:
import LlamaCloud from '@llamaindex/llama-cloud';
const client = new LlamaCloud(); // Uses LLAMA_CLOUD_API_KEY env var
// Retrieve relevant nodes from the indexconst results = await client.pipelines.retrieve('your-pipeline-id', { query: 'Your query here', // -- Customize search behavior -- // dense_similarity_top_k: 20, // sparse_similarity_top_k: 20, // alpha: 0.5, // -- Control reranking behavior -- // enable_reranking: true, // rerank_top_n: 5,});
for (const node of results.retrieval_nodes || []) { console.log(`Score: ${node.score}, Text: ${node.node?.text}`);}Categorize documents with natural-language rules. Pre-processing for extraction, parsing, or indexing.
import LlamaCloud from '@llamaindex/llama-cloud';import fs from 'fs';
const client = new LlamaCloud();
// Upload a documentconst file = await client.files.create({ file: fs.createReadStream('document.pdf'), purpose: 'classify',});
// Classify with natural language rulesconst result = await client.classifier.classify({ file_ids: [file.id], rules: [ { type: 'invoice', description: 'Documents with invoice numbers, line items, and totals', }, { type: 'receipt', description: 'Short POS receipts with merchant and total', }, { type: 'contract', description: 'Legal agreements with terms and signatures', }, ], mode: 'FAST', // or 'MULTIMODAL' for visual docs});
for (const item of result.items) { if (item.result) { console.log(`Type: ${item.result.type}, Confidence: ${item.result.confidence}`); }}Segment concatenated PDFs into logical sections. AI-powered classification to split combined documents.
import LlamaCloud from '@llamaindex/llama-cloud';import fs from 'fs';
const client = new LlamaCloud();
// Upload a combined PDFconst file = await client.files.create({ file: fs.createReadStream('combined.pdf'), purpose: 'split',});
// Split into logical sectionsconst result = await client.beta.split.split({ categories: [ { name: 'invoice', description: 'Commercial document with line items and totals', }, { name: 'contract', description: 'Legal agreement with terms and signatures', }, ], document_input: { type: 'file_id', value: file.id },});
for (const segment of result.result.segments) { console.log(`Pages ${segment.pages}: ${segment.category} (${segment.confidence_category})`);}Extract tables and metadata from messy spreadsheets. Output as Parquet files with rich cell metadata.
import LlamaCloud from '@llamaindex/llama-cloud';import fs from 'fs';
const client = new LlamaCloud();
// Upload a spreadsheetconst file = await client.files.create({ file: fs.createReadStream('spreadsheet.xlsx'), purpose: 'parse',});
// Extract tables and regionsconst result = await client.beta.sheets.parse({ file_id: file.id, config: { generate_additional_metadata: true },});
// Print extracted regionsconsole.log(`Found ${result.regions?.length || 0} regions`);for (const region of result.regions || []) { console.log(` - ${region.region_id}: ${region.title} (${region.location})`);}Resources
Section titled “Resources”- Web UI - Visual interface for all products
- Python SDK -
pip install llama-cloud - TypeScript SDK -
npm install @llamaindex/llama-cloud - API Reference