Skip to content

Beta

BetaAgent Data

Get Agent Data
beta.agent_data.get(stritem_id, AgentDataGetParams**kwargs) -> AgentData
GET/api/v1/beta/agent-data/{item_id}
Update Agent Data
beta.agent_data.update(stritem_id, AgentDataUpdateParams**kwargs) -> AgentData
PUT/api/v1/beta/agent-data/{item_id}
Delete Agent Data
beta.agent_data.delete(stritem_id, AgentDataDeleteParams**kwargs) -> AgentDataDeleteResponse
DELETE/api/v1/beta/agent-data/{item_id}
Create Agent Data
beta.agent_data.create(AgentDataCreateParams**kwargs) -> AgentData
POST/api/v1/beta/agent-data
Search Agent Data
beta.agent_data.search(AgentDataSearchParams**kwargs) -> SyncPaginatedCursorPost[AgentData]
POST/api/v1/beta/agent-data/:search
Aggregate Agent Data
beta.agent_data.aggregate(AgentDataAggregateParams**kwargs) -> SyncPaginatedCursorPost[AgentDataAggregateResponse]
POST/api/v1/beta/agent-data/:aggregate
Delete Agent Data By Query
beta.agent_data.delete_by_query(AgentDataDeleteByQueryParams**kwargs) -> AgentDataDeleteByQueryResponse
POST/api/v1/beta/agent-data/:delete
ModelsExpand Collapse
class AgentData:

API Result for a single agent data item

data: Dict[str, object]
deployment_name: str
id: Optional[str]
collection: Optional[str]
created_at: Optional[datetime]
project_id: Optional[str]
updated_at: Optional[datetime]
Dict[str, str]
class AgentDataAggregateResponse:

API Result for a single group in the aggregate response

group_key: Dict[str, object]
count: Optional[int]
first_item: Optional[Dict[str, object]]
class AgentDataDeleteByQueryResponse:

API response for bulk delete operation

deleted_count: int

BetaSheets

Create Spreadsheet Job
beta.sheets.create(SheetCreateParams**kwargs) -> SheetsJob
POST/api/v1/beta/sheets/jobs
List Spreadsheet Jobs
beta.sheets.list(SheetListParams**kwargs) -> SyncPaginatedCursor[SheetsJob]
GET/api/v1/beta/sheets/jobs
Get Spreadsheet Job
beta.sheets.get(strspreadsheet_job_id, SheetGetParams**kwargs) -> SheetsJob
GET/api/v1/beta/sheets/jobs/{spreadsheet_job_id}
Get Result Region
beta.sheets.get_result_table(Literal["table", "extra", "cell_metadata"]region_type, SheetGetResultTableParams**kwargs) -> PresignedURL
GET/api/v1/beta/sheets/jobs/{spreadsheet_job_id}/regions/{region_id}/result/{region_type}
Delete Spreadsheet Job
beta.sheets.delete_job(strspreadsheet_job_id, SheetDeleteJobParams**kwargs) -> object
DELETE/api/v1/beta/sheets/jobs/{spreadsheet_job_id}
ModelsExpand Collapse
class SheetsJob:

A spreadsheet parsing job

id: str

The ID of the job

Configuration for the parsing job

extraction_range: Optional[str]

A1 notation of the range to extract a single region from. If None, the entire sheet is used.

flatten_hierarchical_tables: Optional[bool]

Return a flattened dataframe when a detected table is recognized as hierarchical.

generate_additional_metadata: Optional[bool]

Whether to generate additional metadata (title, description) for each extracted region.

include_hidden_cells: Optional[bool]

Whether to include hidden cells when extracting regions from the spreadsheet.

sheet_names: Optional[List[str]]

The names of the sheets to extract regions from. If empty, all sheets will be processed.

specialization: Optional[str]

Optional specialization mode for domain-specific extraction. Supported values: ‘financial-standard’, ‘financial-enhanced’, ‘financial-precise’. Default None uses the general-purpose pipeline.

table_merge_sensitivity: Optional[Literal["strong", "weak"]]

Influences how likely similar-looking regions are merged into a single table. Useful for spreadsheets that either have sparse tables (strong merging) or many distinct tables close together (weak merging).

One of the following:
"strong"
"weak"
use_experimental_processing: Optional[bool]

Enables experimental processing. Accuracy may be impacted.

created_at: str

When the job was created

file_id: Optional[str]

The ID of the input file

formatuuid
project_id: str

The ID of the project

formatuuid
status: StatusEnum

The status of the parsing job

One of the following:
"PENDING"
"SUCCESS"
"ERROR"
"PARTIAL_SUCCESS"
"CANCELLED"
updated_at: str

When the job was last updated

user_id: str

The ID of the user

errors: Optional[List[str]]

Any errors encountered

Deprecatedfile: Optional[File]

Schema for a file.

id: str

Unique identifier

formatuuid
name: str
project_id: str

The ID of the project that the file belongs to

formatuuid
created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

The ID of the data source that the file belongs to

formatuuid
expires_at: Optional[datetime]

The expiration date for the file. Files past this date can be deleted.

formatdate-time
external_file_id: Optional[str]

The ID of the file in the external system

file_size: Optional[int]

Size of the file in bytes

minimum0
file_type: Optional[str]

File type (e.g. pdf, docx, etc.)

maxLength3000
minLength1
last_modified_at: Optional[datetime]

The last modified time of the file

formatdate-time
permission_info: Optional[Dict[str, Union[Dict[str, object], List[object], str, 3 more]]]

Permission information for the file

One of the following:
Dict[str, object]
List[object]
str
float
bool
purpose: Optional[str]

The intended purpose of the file (e.g., ‘user_data’, ‘parse’, ‘extract’, ‘split’, ‘classify’)

resource_info: Optional[Dict[str, Union[Dict[str, object], List[object], str, 3 more]]]

Resource information for the file

One of the following:
Dict[str, object]
List[object]
str
float
bool
updated_at: Optional[datetime]

Update datetime

formatdate-time
regions: Optional[List[Region]]

All extracted regions (populated when job is complete)

location: str

Location of the region in the spreadsheet

region_type: str

Type of the extracted region

sheet_name: str

Worksheet name where region was found

description: Optional[str]

Generated description for the region

region_id: Optional[str]

Unique identifier for this region within the file

title: Optional[str]

Generated title for the region

success: Optional[bool]

Whether the job completed successfully

worksheet_metadata: Optional[List[WorksheetMetadata]]

Metadata for each processed worksheet (populated when job is complete)

sheet_name: str

Name of the worksheet

description: Optional[str]

Generated description of the worksheet

title: Optional[str]

Generated title for the worksheet

class SheetsParsingConfig:

Configuration for spreadsheet parsing and region extraction

extraction_range: Optional[str]

A1 notation of the range to extract a single region from. If None, the entire sheet is used.

flatten_hierarchical_tables: Optional[bool]

Return a flattened dataframe when a detected table is recognized as hierarchical.

generate_additional_metadata: Optional[bool]

Whether to generate additional metadata (title, description) for each extracted region.

include_hidden_cells: Optional[bool]

Whether to include hidden cells when extracting regions from the spreadsheet.

sheet_names: Optional[List[str]]

The names of the sheets to extract regions from. If empty, all sheets will be processed.

specialization: Optional[str]

Optional specialization mode for domain-specific extraction. Supported values: ‘financial-standard’, ‘financial-enhanced’, ‘financial-precise’. Default None uses the general-purpose pipeline.

table_merge_sensitivity: Optional[Literal["strong", "weak"]]

Influences how likely similar-looking regions are merged into a single table. Useful for spreadsheets that either have sparse tables (strong merging) or many distinct tables close together (weak merging).

One of the following:
"strong"
"weak"
use_experimental_processing: Optional[bool]

Enables experimental processing. Accuracy may be impacted.

BetaDirectories

Create Directory
beta.directories.create(DirectoryCreateParams**kwargs) -> DirectoryCreateResponse
POST/api/v1/beta/directories
List Directories
beta.directories.list(DirectoryListParams**kwargs) -> SyncPaginatedCursor[DirectoryListResponse]
GET/api/v1/beta/directories
Get Directory
beta.directories.get(strdirectory_id, DirectoryGetParams**kwargs) -> DirectoryGetResponse
GET/api/v1/beta/directories/{directory_id}
Update Directory
beta.directories.update(strdirectory_id, DirectoryUpdateParams**kwargs) -> DirectoryUpdateResponse
PATCH/api/v1/beta/directories/{directory_id}
Delete Directory
beta.directories.delete(strdirectory_id, DirectoryDeleteParams**kwargs)
DELETE/api/v1/beta/directories/{directory_id}
ModelsExpand Collapse
class DirectoryCreateResponse:

API response schema for a directory.

id: str

Unique identifier for the directory.

name: str

Human-readable name for the directory.

minLength1
project_id: str

Project the directory belongs to.

created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

Optional data source id the directory syncs from. Null if just manual uploads.

deleted_at: Optional[datetime]

Optional timestamp of when the directory was deleted. Null if not deleted.

formatdate-time
description: Optional[str]

Optional description shown to users.

updated_at: Optional[datetime]

Update datetime

formatdate-time
class DirectoryListResponse:

API response schema for a directory.

id: str

Unique identifier for the directory.

name: str

Human-readable name for the directory.

minLength1
project_id: str

Project the directory belongs to.

created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

Optional data source id the directory syncs from. Null if just manual uploads.

deleted_at: Optional[datetime]

Optional timestamp of when the directory was deleted. Null if not deleted.

formatdate-time
description: Optional[str]

Optional description shown to users.

updated_at: Optional[datetime]

Update datetime

formatdate-time
class DirectoryGetResponse:

API response schema for a directory.

id: str

Unique identifier for the directory.

name: str

Human-readable name for the directory.

minLength1
project_id: str

Project the directory belongs to.

created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

Optional data source id the directory syncs from. Null if just manual uploads.

deleted_at: Optional[datetime]

Optional timestamp of when the directory was deleted. Null if not deleted.

formatdate-time
description: Optional[str]

Optional description shown to users.

updated_at: Optional[datetime]

Update datetime

formatdate-time
class DirectoryUpdateResponse:

API response schema for a directory.

id: str

Unique identifier for the directory.

name: str

Human-readable name for the directory.

minLength1
project_id: str

Project the directory belongs to.

created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

Optional data source id the directory syncs from. Null if just manual uploads.

deleted_at: Optional[datetime]

Optional timestamp of when the directory was deleted. Null if not deleted.

formatdate-time
description: Optional[str]

Optional description shown to users.

updated_at: Optional[datetime]

Update datetime

formatdate-time

BetaDirectoriesFiles

Add Directory File
beta.directories.files.add(strdirectory_id, FileAddParams**kwargs) -> FileAddResponse
POST/api/v1/beta/directories/{directory_id}/files
List Directory Files
beta.directories.files.list(strdirectory_id, FileListParams**kwargs) -> SyncPaginatedCursor[FileListResponse]
GET/api/v1/beta/directories/{directory_id}/files
Get Directory File
beta.directories.files.get(strdirectory_file_id, FileGetParams**kwargs) -> FileGetResponse
GET/api/v1/beta/directories/{directory_id}/files/{directory_file_id}
Update Directory File
beta.directories.files.update(strdirectory_file_id, FileUpdateParams**kwargs) -> FileUpdateResponse
PATCH/api/v1/beta/directories/{directory_id}/files/{directory_file_id}
Delete Directory File
beta.directories.files.delete(strdirectory_file_id, FileDeleteParams**kwargs)
DELETE/api/v1/beta/directories/{directory_id}/files/{directory_file_id}
Upload File To Directory
beta.directories.files.upload(strdirectory_id, FileUploadParams**kwargs) -> FileUploadResponse
POST/api/v1/beta/directories/{directory_id}/files/upload
ModelsExpand Collapse
class FileAddResponse:

API response schema for a directory file.

id: str

Unique identifier for the directory file.

directory_id: str

Directory the file belongs to.

display_name: str

Display name for the file.

minLength1
project_id: str

Project the directory file belongs to.

unique_id: str

Unique identifier for the file in the directory

minLength1
created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

Optional data source credential associated with the file.

deleted_at: Optional[datetime]

Soft delete marker when the file is removed upstream or by user action.

formatdate-time
file_id: Optional[str]

File ID for the storage location.

metadata: Optional[Dict[str, Union[str, float, bool, null]]]

Merged metadata from all sources. Higher-priority sources override lower.

One of the following:
str
float
bool
updated_at: Optional[datetime]

Update datetime

formatdate-time
class FileListResponse:

API response schema for a directory file.

id: str

Unique identifier for the directory file.

directory_id: str

Directory the file belongs to.

display_name: str

Display name for the file.

minLength1
project_id: str

Project the directory file belongs to.

unique_id: str

Unique identifier for the file in the directory

minLength1
created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

Optional data source credential associated with the file.

deleted_at: Optional[datetime]

Soft delete marker when the file is removed upstream or by user action.

formatdate-time
file_id: Optional[str]

File ID for the storage location.

metadata: Optional[Dict[str, Union[str, float, bool, null]]]

Merged metadata from all sources. Higher-priority sources override lower.

One of the following:
str
float
bool
updated_at: Optional[datetime]

Update datetime

formatdate-time
class FileGetResponse:

API response schema for a directory file.

id: str

Unique identifier for the directory file.

directory_id: str

Directory the file belongs to.

display_name: str

Display name for the file.

minLength1
project_id: str

Project the directory file belongs to.

unique_id: str

Unique identifier for the file in the directory

minLength1
created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

Optional data source credential associated with the file.

deleted_at: Optional[datetime]

Soft delete marker when the file is removed upstream or by user action.

formatdate-time
file_id: Optional[str]

File ID for the storage location.

metadata: Optional[Dict[str, Union[str, float, bool, null]]]

Merged metadata from all sources. Higher-priority sources override lower.

One of the following:
str
float
bool
updated_at: Optional[datetime]

Update datetime

formatdate-time
class FileUpdateResponse:

API response schema for a directory file.

id: str

Unique identifier for the directory file.

directory_id: str

Directory the file belongs to.

display_name: str

Display name for the file.

minLength1
project_id: str

Project the directory file belongs to.

unique_id: str

Unique identifier for the file in the directory

minLength1
created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

Optional data source credential associated with the file.

deleted_at: Optional[datetime]

Soft delete marker when the file is removed upstream or by user action.

formatdate-time
file_id: Optional[str]

File ID for the storage location.

metadata: Optional[Dict[str, Union[str, float, bool, null]]]

Merged metadata from all sources. Higher-priority sources override lower.

One of the following:
str
float
bool
updated_at: Optional[datetime]

Update datetime

formatdate-time
class FileUploadResponse:

API response schema for a directory file.

id: str

Unique identifier for the directory file.

directory_id: str

Directory the file belongs to.

display_name: str

Display name for the file.

minLength1
project_id: str

Project the directory file belongs to.

unique_id: str

Unique identifier for the file in the directory

minLength1
created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

Optional data source credential associated with the file.

deleted_at: Optional[datetime]

Soft delete marker when the file is removed upstream or by user action.

formatdate-time
file_id: Optional[str]

File ID for the storage location.

metadata: Optional[Dict[str, Union[str, float, bool, null]]]

Merged metadata from all sources. Higher-priority sources override lower.

One of the following:
str
float
bool
updated_at: Optional[datetime]

Update datetime

formatdate-time

BetaBatch

Create Batch Job
beta.batch.create(BatchCreateParams**kwargs) -> BatchCreateResponse
POST/api/v1/beta/batch-processing
List Batch Jobs
beta.batch.list(BatchListParams**kwargs) -> SyncPaginatedBatchItems[BatchListResponse]
GET/api/v1/beta/batch-processing
Get Batch Job Status
beta.batch.get_status(strjob_id, BatchGetStatusParams**kwargs) -> BatchGetStatusResponse
GET/api/v1/beta/batch-processing/{job_id}
Cancel Batch Job
beta.batch.cancel(strjob_id, BatchCancelParams**kwargs) -> BatchCancelResponse
POST/api/v1/beta/batch-processing/{job_id}/cancel
ModelsExpand Collapse
class BatchCreateResponse:

Response schema for a batch processing job.

id: str

Unique identifier for the batch job

job_type: Literal["parse", "extract", "classify"]

Type of processing operation (parse or classify)

One of the following:
"parse"
"extract"
"classify"
project_id: str

Project this job belongs to

status: Literal["pending", "running", "dispatched", 3 more]

Current job status

One of the following:
"pending"
"running"
"dispatched"
"completed"
"failed"
"cancelled"
total_items: int

Total number of items in the job

completed_at: Optional[datetime]

Timestamp when job completed

formatdate-time
created_at: Optional[datetime]

Creation datetime

formatdate-time
directory_id: Optional[str]

Directory being processed

effective_at: Optional[datetime]
error_message: Optional[str]

Error message for the latest job attempt, if any.

failed_items: Optional[int]

Number of items that failed processing

job_record_id: Optional[str]

The job record ID associated with this status, if any.

processed_items: Optional[int]

Number of items processed so far

skipped_items: Optional[int]

Number of items skipped (already processed or size limit)

started_at: Optional[datetime]

Timestamp when job processing started

formatdate-time
updated_at: Optional[datetime]

Update datetime

formatdate-time
workflow_id: Optional[str]

Async job tracking ID

class BatchListResponse:

Response schema for a batch processing job.

id: str

Unique identifier for the batch job

job_type: Literal["parse", "extract", "classify"]

Type of processing operation (parse or classify)

One of the following:
"parse"
"extract"
"classify"
project_id: str

Project this job belongs to

status: Literal["pending", "running", "dispatched", 3 more]

Current job status

One of the following:
"pending"
"running"
"dispatched"
"completed"
"failed"
"cancelled"
total_items: int

Total number of items in the job

completed_at: Optional[datetime]

Timestamp when job completed

formatdate-time
created_at: Optional[datetime]

Creation datetime

formatdate-time
directory_id: Optional[str]

Directory being processed

effective_at: Optional[datetime]
error_message: Optional[str]

Error message for the latest job attempt, if any.

failed_items: Optional[int]

Number of items that failed processing

job_record_id: Optional[str]

The job record ID associated with this status, if any.

processed_items: Optional[int]

Number of items processed so far

skipped_items: Optional[int]

Number of items skipped (already processed or size limit)

started_at: Optional[datetime]

Timestamp when job processing started

formatdate-time
updated_at: Optional[datetime]

Update datetime

formatdate-time
workflow_id: Optional[str]

Async job tracking ID

class BatchGetStatusResponse:

Detailed status response for a batch processing job.

job: Job

Response schema for a batch processing job.

id: str

Unique identifier for the batch job

job_type: Literal["parse", "extract", "classify"]

Type of processing operation (parse or classify)

One of the following:
"parse"
"extract"
"classify"
project_id: str

Project this job belongs to

status: Literal["pending", "running", "dispatched", 3 more]

Current job status

One of the following:
"pending"
"running"
"dispatched"
"completed"
"failed"
"cancelled"
total_items: int

Total number of items in the job

completed_at: Optional[datetime]

Timestamp when job completed

formatdate-time
created_at: Optional[datetime]

Creation datetime

formatdate-time
directory_id: Optional[str]

Directory being processed

effective_at: Optional[datetime]
error_message: Optional[str]

Error message for the latest job attempt, if any.

failed_items: Optional[int]

Number of items that failed processing

job_record_id: Optional[str]

The job record ID associated with this status, if any.

processed_items: Optional[int]

Number of items processed so far

skipped_items: Optional[int]

Number of items skipped (already processed or size limit)

started_at: Optional[datetime]

Timestamp when job processing started

formatdate-time
updated_at: Optional[datetime]

Update datetime

formatdate-time
workflow_id: Optional[str]

Async job tracking ID

progress_percentage: float

Percentage of items processed (0-100)

maximum100
minimum0
class BatchCancelResponse:

Response after cancelling a batch job.

job_id: str

ID of the cancelled job

message: str

Confirmation message

processed_items: int

Number of items processed before cancellation

status: Literal["pending", "running", "dispatched", 3 more]

New status (should be ‘cancelled’)

One of the following:
"pending"
"running"
"dispatched"
"completed"
"failed"
"cancelled"

BetaBatchJob Items

List Batch Job Items
beta.batch.job_items.list(strjob_id, JobItemListParams**kwargs) -> SyncPaginatedBatchItems[JobItemListResponse]
GET/api/v1/beta/batch-processing/{job_id}/items
Get Item Processing Results
beta.batch.job_items.get_processing_results(stritem_id, JobItemGetProcessingResultsParams**kwargs) -> JobItemGetProcessingResultsResponse
GET/api/v1/beta/batch-processing/items/{item_id}/processing-results
ModelsExpand Collapse
class JobItemListResponse:

Detailed information about an item in a batch job.

item_id: str

ID of the item

item_name: str

Name of the item

status: Literal["pending", "processing", "completed", 3 more]

Processing status of this item

One of the following:
"pending"
"processing"
"completed"
"failed"
"skipped"
"cancelled"
completed_at: Optional[datetime]

When processing completed for this item

formatdate-time
effective_at: Optional[datetime]
error_message: Optional[str]

Error message for the latest job attempt, if any.

job_id: Optional[str]

Job ID for the underlying processing job (links to parse/extract job results)

job_record_id: Optional[str]

The job record ID associated with this status, if any.

skip_reason: Optional[str]

Reason item was skipped (e.g., ‘already_processed’, ‘size_limit_exceeded’)

started_at: Optional[datetime]

When processing started for this item

formatdate-time
class JobItemGetProcessingResultsResponse:

Response containing all processing results for an item.

item_id: str

ID of the source item

item_name: str

Name of the source item

processing_results: Optional[List[ProcessingResult]]

List of all processing operations performed on this item

item_id: str

Source item that was processed

job_config: ProcessingResultJobConfig

Job configuration used for processing

One of the following:
class ProcessingResultJobConfigBatchParseJobRecordCreate:

Batch-specific parse job record for batch processing.

This model contains the metadata and configuration for a batch parse job, but excludes file-specific information. It’s used as input to the batch parent workflow and combined with DirectoryFile data to create full ParseJobRecordCreate instances for each file.

Attributes: job_name: Must be PARSE_RAW_FILE partitions: Partitions for job output location parameters: Generic parse configuration (BatchParseJobConfig) session_id: Upstream request ID for tracking correlation_id: Correlation ID for cross-service tracking parent_job_execution_id: Parent job execution ID if nested user_id: User who created the job project_id: Project this job belongs to webhook_url: Optional webhook URL for job completion notifications

correlation_id: Optional[str]

The correlation ID for this job. Used for tracking the job across services.

formatuuid
job_name: Optional[Literal["parse_raw_file_job"]]
parameters: Optional[ProcessingResultJobConfigBatchParseJobRecordCreateParameters]

Generic parse job configuration for batch processing.

This model contains the parsing configuration that applies to all files in a batch, but excludes file-specific fields like file_name, file_id, etc. Those file-specific fields are populated from DirectoryFile data when creating individual ParseJobRecordCreate instances for each file.

The fields in this model should be generic settings that apply uniformly to all files being processed in the batch.

adaptive_long_table: Optional[bool]
aggressive_table_extraction: Optional[bool]
auto_mode: Optional[bool]
auto_mode_configuration_json: Optional[str]
auto_mode_trigger_on_image_in_page: Optional[bool]
auto_mode_trigger_on_regexp_in_page: Optional[str]
auto_mode_trigger_on_table_in_page: Optional[bool]
auto_mode_trigger_on_text_in_page: Optional[str]
azure_openai_api_version: Optional[str]
azure_openai_deployment_name: Optional[str]
azure_openai_endpoint: Optional[str]
azure_openai_key: Optional[str]
bbox_bottom: Optional[float]
bbox_left: Optional[float]
bbox_right: Optional[float]
bbox_top: Optional[float]
bounding_box: Optional[str]
compact_markdown_table: Optional[bool]
complemental_formatting_instruction: Optional[str]
content_guideline_instruction: Optional[str]
continuous_mode: Optional[bool]
custom_metadata: Optional[Dict[str, object]]

The custom metadata to attach to the documents.

disable_image_extraction: Optional[bool]
disable_ocr: Optional[bool]
disable_reconstruction: Optional[bool]
do_not_cache: Optional[bool]
do_not_unroll_columns: Optional[bool]
enable_cost_optimizer: Optional[bool]
extract_charts: Optional[bool]
extract_layout: Optional[bool]
extract_printed_page_number: Optional[bool]
fast_mode: Optional[bool]
formatting_instruction: Optional[str]
gpt4o_api_key: Optional[str]
gpt4o_mode: Optional[bool]
guess_xlsx_sheet_name: Optional[bool]
hide_footers: Optional[bool]
hide_headers: Optional[bool]
high_res_ocr: Optional[bool]
html_make_all_elements_visible: Optional[bool]
html_remove_fixed_elements: Optional[bool]
html_remove_navigation_elements: Optional[bool]
http_proxy: Optional[str]
ignore_document_elements_for_layout_detection: Optional[bool]
images_to_save: Optional[List[Literal["screenshot", "embedded", "layout"]]]
One of the following:
"screenshot"
"embedded"
"layout"
inline_images_in_markdown: Optional[bool]
input_s3_path: Optional[str]
input_s3_region: Optional[str]

The region for the input S3 bucket.

input_url: Optional[str]
internal_is_screenshot_job: Optional[bool]
invalidate_cache: Optional[bool]
is_formatting_instruction: Optional[bool]
job_timeout_extra_time_per_page_in_seconds: Optional[float]
job_timeout_in_seconds: Optional[float]
keep_page_separator_when_merging_tables: Optional[bool]
lang: Optional[str]

The language.

languages: Optional[List[ParsingLanguages]]
One of the following:
"af"
"az"
"bs"
"cs"
"cy"
"da"
"de"
"en"
"es"
"et"
"fr"
"ga"
"hr"
"hu"
"id"
"is"
"it"
"ku"
"la"
"lt"
"lv"
"mi"
"ms"
"mt"
"nl"
"no"
"oc"
"pi"
"pl"
"pt"
"ro"
"rs_latin"
"sk"
"sl"
"sq"
"sv"
"sw"
"tl"
"tr"
"uz"
"vi"
"ar"
"fa"
"ug"
"ur"
"bn"
"as"
"mni"
"ru"
"rs_cyrillic"
"be"
"bg"
"uk"
"mn"
"abq"
"ady"
"kbd"
"ava"
"dar"
"inh"
"che"
"lbe"
"lez"
"tab"
"tjk"
"hi"
"mr"
"ne"
"bh"
"mai"
"ang"
"bho"
"mah"
"sck"
"new"
"gom"
"sa"
"bgc"
"th"
"ch_sim"
"ch_tra"
"ja"
"ko"
"ta"
"te"
"kn"
layout_aware: Optional[bool]
line_level_bounding_box: Optional[bool]
markdown_table_multiline_header_separator: Optional[str]
max_pages: Optional[int]
max_pages_enforced: Optional[int]
merge_tables_across_pages_in_markdown: Optional[bool]
model: Optional[str]
outlined_table_extraction: Optional[bool]
output_pdf_of_document: Optional[bool]
output_s3_path_prefix: Optional[str]

If specified, llamaParse will save the output to the specified path. All output file will use this ‘prefix’ should be a valid s3:// url

output_s3_region: Optional[str]

The region for the output S3 bucket.

output_tables_as_html: Optional[bool]
output_bucket: Optional[str]

The output bucket.

page_error_tolerance: Optional[float]
page_header_prefix: Optional[str]
page_header_suffix: Optional[str]
page_prefix: Optional[str]
page_separator: Optional[str]
page_suffix: Optional[str]
parse_mode: Optional[ParsingMode]

Enum for representing the mode of parsing to be used.

One of the following:
"parse_page_without_llm"
"parse_page_with_llm"
"parse_page_with_lvm"
"parse_page_with_agent"
"parse_page_with_layout_agent"
"parse_document_with_llm"
"parse_document_with_lvm"
"parse_document_with_agent"
parsing_instruction: Optional[str]
pipeline_id: Optional[str]

The pipeline ID.

precise_bounding_box: Optional[bool]
premium_mode: Optional[bool]
presentation_out_of_bounds_content: Optional[bool]
presentation_skip_embedded_data: Optional[bool]
preserve_layout_alignment_across_pages: Optional[bool]
preserve_very_small_text: Optional[bool]
preset: Optional[str]
priority: Optional[Literal["low", "medium", "high", "critical"]]

The priority for the request. This field may be ignored or overwritten depending on the organization tier.

One of the following:
"low"
"medium"
"high"
"critical"
project_id: Optional[str]
remove_hidden_text: Optional[bool]
replace_failed_page_mode: Optional[FailPageMode]

Enum for representing the different available page error handling modes.

One of the following:
"raw_text"
"blank_page"
"error_message"
replace_failed_page_with_error_message_prefix: Optional[str]
replace_failed_page_with_error_message_suffix: Optional[str]
resource_info: Optional[Dict[str, object]]

The resource info about the file

save_images: Optional[bool]
skip_diagonal_text: Optional[bool]
specialized_chart_parsing_agentic: Optional[bool]
specialized_chart_parsing_efficient: Optional[bool]
specialized_chart_parsing_plus: Optional[bool]
specialized_image_parsing: Optional[bool]
spreadsheet_extract_sub_tables: Optional[bool]
spreadsheet_force_formula_computation: Optional[bool]
spreadsheet_include_hidden_sheets: Optional[bool]
strict_mode_buggy_font: Optional[bool]
strict_mode_image_extraction: Optional[bool]
strict_mode_image_ocr: Optional[bool]
strict_mode_reconstruction: Optional[bool]
structured_output: Optional[bool]
structured_output_json_schema: Optional[str]
structured_output_json_schema_name: Optional[str]
system_prompt: Optional[str]
system_prompt_append: Optional[str]
take_screenshot: Optional[bool]
target_pages: Optional[str]
tier: Optional[str]
type: Optional[Literal["parse"]]
use_vendor_multimodal_model: Optional[bool]
user_prompt: Optional[str]
vendor_multimodal_api_key: Optional[str]
vendor_multimodal_model_name: Optional[str]
version: Optional[str]
webhook_configurations: Optional[List[ProcessingResultJobConfigBatchParseJobRecordCreateParametersWebhookConfiguration]]

Outbound webhook endpoints to notify on job status changes

webhook_events: Optional[List[Literal["extract.pending", "extract.success", "extract.error", 14 more]]]

Events to subscribe to (e.g. ‘parse.success’, ‘extract.error’). If null, all events are delivered.

One of the following:
"extract.pending"
"extract.success"
"extract.error"
"extract.partial_success"
"extract.cancelled"
"parse.pending"
"parse.running"
"parse.success"
"parse.error"
"parse.partial_success"
"parse.cancelled"
"classify.pending"
"classify.success"
"classify.error"
"classify.partial_success"
"classify.cancelled"
"unmapped_event"
webhook_headers: Optional[Dict[str, str]]

Custom HTTP headers sent with each webhook request (e.g. auth tokens)

webhook_output_format: Optional[str]

Response format sent to the webhook: ‘string’ (default) or ‘json’

webhook_url: Optional[str]

URL to receive webhook POST notifications

webhook_url: Optional[str]
parent_job_execution_id: Optional[str]

The ID of the parent job execution.

formatuuid
partitions: Optional[Dict[str, str]]

The partitions for this execution. Used for determining where to save job output.

project_id: Optional[str]

The ID of the project this job belongs to.

formatuuid
session_id: Optional[str]

The upstream request ID that created this job. Used for tracking the job across services.

formatuuid
user_id: Optional[str]

The ID of the user that created this job

webhook_url: Optional[str]

The URL that needs to be called at the end of the parsing job.

class ClassifyJob:

A classify job.

id: str

Unique identifier

formatuuid
project_id: str

The ID of the project

formatuuid
rules: List[ClassifierRule]

The rules to classify the files

description: str

Natural language description of what to classify. Be specific about the content characteristics that identify this document type.

maxLength500
minLength10
type: str

The document type to assign when this rule matches (e.g., ‘invoice’, ‘receipt’, ‘contract’)

maxLength50
minLength1
status: StatusEnum

The status of the classify job

One of the following:
"PENDING"
"SUCCESS"
"ERROR"
"PARTIAL_SUCCESS"
"CANCELLED"
user_id: str

The ID of the user

created_at: Optional[datetime]

Creation datetime

formatdate-time
effective_at: Optional[datetime]
error_message: Optional[str]

Error message for the latest job attempt, if any.

job_record_id: Optional[str]

The job record ID associated with this status, if any.

mode: Optional[Literal["FAST", "MULTIMODAL"]]

The classification mode to use

One of the following:
"FAST"
"MULTIMODAL"
parsing_configuration: Optional[ClassifyParsingConfiguration]

The configuration for the parsing job

lang: Optional[ParsingLanguages]

The language to parse the files in

One of the following:
"af"
"az"
"bs"
"cs"
"cy"
"da"
"de"
"en"
"es"
"et"
"fr"
"ga"
"hr"
"hu"
"id"
"is"
"it"
"ku"
"la"
"lt"
"lv"
"mi"
"ms"
"mt"
"nl"
"no"
"oc"
"pi"
"pl"
"pt"
"ro"
"rs_latin"
"sk"
"sl"
"sq"
"sv"
"sw"
"tl"
"tr"
"uz"
"vi"
"ar"
"fa"
"ug"
"ur"
"bn"
"as"
"mni"
"ru"
"rs_cyrillic"
"be"
"bg"
"uk"
"mn"
"abq"
"ady"
"kbd"
"ava"
"dar"
"inh"
"che"
"lbe"
"lez"
"tab"
"tjk"
"hi"
"mr"
"ne"
"bh"
"mai"
"ang"
"bho"
"mah"
"sck"
"new"
"gom"
"sa"
"bgc"
"th"
"ch_sim"
"ch_tra"
"ja"
"ko"
"ta"
"te"
"kn"
max_pages: Optional[int]

The maximum number of pages to parse

target_pages: Optional[List[int]]

The pages to target for parsing (0-indexed, so first page is at 0)

updated_at: Optional[datetime]

Update datetime

formatdate-time
job_type: Literal["parse", "extract", "classify"]

Type of processing performed

One of the following:
"parse"
"extract"
"classify"
output_s3_path: str

Location of the processing output

parameters_hash: str

Content hash of the job configuration for dedup

processed_at: datetime

When this processing occurred

formatdate-time
result_id: str

Unique identifier for this result

output_metadata: Optional[object]

Metadata about processing output.

Currently empty - will be populated with job-type-specific metadata fields in the future.

BetaSplit

Create Split Job
beta.split.create(SplitCreateParams**kwargs) -> SplitCreateResponse
POST/api/v1/beta/split/jobs
List Split Jobs
beta.split.list(SplitListParams**kwargs) -> SyncPaginatedCursor[SplitListResponse]
GET/api/v1/beta/split/jobs
Get Split Job
beta.split.get(strsplit_job_id, SplitGetParams**kwargs) -> SplitGetResponse
GET/api/v1/beta/split/jobs/{split_job_id}
ModelsExpand Collapse
class SplitCategory:

Category definition for document splitting.

name: str

Name of the category.

maxLength200
minLength1
description: Optional[str]

Optional description of what content belongs in this category.

maxLength2000
minLength1
class SplitDocumentInput:

Document input specification for beta API.

type: str

Type of document input. Valid values are: file_id

value: str

Document identifier.

class SplitResultResponse:

Result of a completed split job.

segments: List[SplitSegmentResponse]

List of document segments.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

class SplitSegmentResponse:

A segment of the split document.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

class SplitCreateResponse:

Beta response — uses nested document_input object.

id: str

Unique identifier for the split job.

categories: List[SplitCategory]

Categories used for splitting.

name: str

Name of the category.

maxLength200
minLength1
description: Optional[str]

Optional description of what content belongs in this category.

maxLength2000
minLength1
document_input: SplitDocumentInput

Document that was split.

type: str

Type of document input. Valid values are: file_id

value: str

Document identifier.

project_id: str

Project ID this job belongs to.

status: str

Current status of the job. Valid values are: pending, processing, completed, failed, cancelled.

user_id: str

User ID who created this job.

configuration_id: Optional[str]

Split configuration ID used for this job.

created_at: Optional[datetime]

Creation datetime

formatdate-time
error_message: Optional[str]

Error message if the job failed.

result: Optional[SplitResultResponse]

Result of a completed split job.

segments: List[SplitSegmentResponse]

List of document segments.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

updated_at: Optional[datetime]

Update datetime

formatdate-time
class SplitListResponse:

Beta response — uses nested document_input object.

id: str

Unique identifier for the split job.

categories: List[SplitCategory]

Categories used for splitting.

name: str

Name of the category.

maxLength200
minLength1
description: Optional[str]

Optional description of what content belongs in this category.

maxLength2000
minLength1
document_input: SplitDocumentInput

Document that was split.

type: str

Type of document input. Valid values are: file_id

value: str

Document identifier.

project_id: str

Project ID this job belongs to.

status: str

Current status of the job. Valid values are: pending, processing, completed, failed, cancelled.

user_id: str

User ID who created this job.

configuration_id: Optional[str]

Split configuration ID used for this job.

created_at: Optional[datetime]

Creation datetime

formatdate-time
error_message: Optional[str]

Error message if the job failed.

result: Optional[SplitResultResponse]

Result of a completed split job.

segments: List[SplitSegmentResponse]

List of document segments.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

updated_at: Optional[datetime]

Update datetime

formatdate-time
class SplitGetResponse:

Beta response — uses nested document_input object.

id: str

Unique identifier for the split job.

categories: List[SplitCategory]

Categories used for splitting.

name: str

Name of the category.

maxLength200
minLength1
description: Optional[str]

Optional description of what content belongs in this category.

maxLength2000
minLength1
document_input: SplitDocumentInput

Document that was split.

type: str

Type of document input. Valid values are: file_id

value: str

Document identifier.

project_id: str

Project ID this job belongs to.

status: str

Current status of the job. Valid values are: pending, processing, completed, failed, cancelled.

user_id: str

User ID who created this job.

configuration_id: Optional[str]

Split configuration ID used for this job.

created_at: Optional[datetime]

Creation datetime

formatdate-time
error_message: Optional[str]

Error message if the job failed.

result: Optional[SplitResultResponse]

Result of a completed split job.

segments: List[SplitSegmentResponse]

List of document segments.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

updated_at: Optional[datetime]

Update datetime

formatdate-time