Beta

BetaAgent Data

Get Agent Data

beta.agent_data.get(, ) -> AgentData

GET/api/v1/beta/agent-data/{item_id}

Update Agent Data

beta.agent_data.update(, ) -> AgentData

PUT/api/v1/beta/agent-data/{item_id}

Delete Agent Data

beta.agent_data.delete(, ) -> AgentDataDeleteResponse

DELETE/api/v1/beta/agent-data/{item_id}

Create Agent Data

beta.agent_data.create() -> AgentData

POST/api/v1/beta/agent-data

Search Agent Data

beta.agent_data.search() -> SyncPaginatedCursorPost[AgentData]

POST/api/v1/beta/agent-data/:search

Aggregate Agent Data

beta.agent_data.aggregate() -> SyncPaginatedCursorPost[AgentDataAggregateResponse]

POST/api/v1/beta/agent-data/:aggregate

Delete Agent Data By Query

beta.agent_data.delete_by_query() -> AgentDataDeleteByQueryResponse

POST/api/v1/beta/agent-data/:delete

ModelsExpand Collapse

class AgentData: …

API Result for a single agent data item

data: Dict[str, object]

deployment_name: str

id: Optional[str]

collection: Optional[str]

created_at: Optional[datetime]

project_id: Optional[str]

updated_at: Optional[datetime]

Dict[str, str]

class AgentDataAggregateResponse: …

API Result for a single group in the aggregate response

group_key: Dict[str, object]

first_item: Optional[Dict[str, object]]

class AgentDataDeleteByQueryResponse: …

API response for bulk delete operation

deleted_count: int

BetaSheets

Create Spreadsheet Job

beta.sheets.create() -> SheetsJob

POST/api/v1/beta/sheets/jobs

List Spreadsheet Jobs

beta.sheets.list() -> SyncPaginatedCursor[SheetsJob]

GET/api/v1/beta/sheets/jobs

Get Spreadsheet Job

beta.sheets.get(, ) -> SheetsJob

GET/api/v1/beta/sheets/jobs/{spreadsheet_job_id}

Get Result Region

beta.sheets.get_result_table(, ) -> PresignedURL

GET/api/v1/beta/sheets/jobs/{spreadsheet_job_id}/regions/{region_id}/result/{region_type}

Delete Spreadsheet Job

beta.sheets.delete_job(, ) -> object

DELETE/api/v1/beta/sheets/jobs/{spreadsheet_job_id}

ModelsExpand Collapse

class SheetsJob: …

A spreadsheet parsing job

id: str

The ID of the job

config: SheetsParsingConfig

Configuration for the parsing job

extraction_range: Optional[str]

A1 notation of the range to extract a single region from. If None, the entire sheet is used.

flatten_hierarchical_tables: Optional[bool]

Return a flattened dataframe when a detected table is recognized as hierarchical.

generate_additional_metadata: Optional[bool]

Whether to generate additional metadata (title, description) for each extracted region.

include_hidden_cells: Optional[bool]

Whether to include hidden cells when extracting regions from the spreadsheet.

sheet_names: Optional[List[str]]

The names of the sheets to extract regions from. If empty, all sheets will be processed.

specialization: Optional[str]

Optional specialization mode for domain-specific extraction. Supported values: ‘financial-standard’, ‘financial-enhanced’, ‘financial-precise’. Default None uses the general-purpose pipeline.

table_merge_sensitivity: Optional[Literal["strong", "weak"]]

Influences how likely similar-looking regions are merged into a single table. Useful for spreadsheets that either have sparse tables (strong merging) or many distinct tables close together (weak merging).

One of the following:

"strong"

"weak"

use_experimental_processing: Optional[bool]

Enables experimental processing. Accuracy may be impacted.

created_at: str

When the job was created

file_id: Optional[str]

The ID of the input file

formatuuid

project_id: str

The ID of the project

formatuuid

status: StatusEnum

The status of the parsing job

One of the following:

"PENDING"

"SUCCESS"

"ERROR"

"PARTIAL_SUCCESS"

"CANCELLED"

updated_at: str

When the job was last updated

user_id: str

The ID of the user

errors: Optional[List[str]]

Any errors encountered

Deprecatedfile: Optional[File]

Schema for a file.

id: str

Unique identifier

formatuuid

project_id: str

The ID of the project that the file belongs to

formatuuid

created_at: Optional[datetime]

Creation datetime

formatdate-time

data_source_id: Optional[str]

The ID of the data source that the file belongs to

formatuuid

expires_at: Optional[datetime]

The expiration date for the file. Files past this date can be deleted.

formatdate-time

external_file_id: Optional[str]

The ID of the file in the external system

file_size: Optional[int]

Size of the file in bytes

minimum0

file_type: Optional[str]

File type (e.g. pdf, docx, etc.)

maxLength3000

minLength1

last_modified_at: Optional[datetime]

The last modified time of the file

formatdate-time

permission_info: Optional[Dict[str, Union[Dict[str, object], List[object], str, 3 more]]]

Permission information for the file

One of the following:

Dict[str, object]

List[object]

str

float

bool

purpose: Optional[str]

The intended purpose of the file (e.g., ‘user_data’, ‘parse’, ‘extract’, ‘split’, ‘classify’)

resource_info: Optional[Dict[str, Union[Dict[str, object], List[object], str, 3 more]]]

Resource information for the file

One of the following:

Dict[str, object]

List[object]

str

float

bool

updated_at: Optional[datetime]

Update datetime

formatdate-time

regions: Optional[List[Region]]

All extracted regions (populated when job is complete)

location: str

Location of the region in the spreadsheet

region_type: str

Type of the extracted region

sheet_name: str

Worksheet name where region was found

description: Optional[str]

Generated description for the region

region_id: Optional[str]

Unique identifier for this region within the file

title: Optional[str]

Generated title for the region

success: Optional[bool]

Whether the job completed successfully

worksheet_metadata: Optional[List[WorksheetMetadata]]

Metadata for each processed worksheet (populated when job is complete)

sheet_name: str

Name of the worksheet

description: Optional[str]

Generated description of the worksheet

title: Optional[str]

Generated title for the worksheet

class SheetsParsingConfig: …

Configuration for spreadsheet parsing and region extraction

extraction_range: Optional[str]

A1 notation of the range to extract a single region from. If None, the entire sheet is used.

flatten_hierarchical_tables: Optional[bool]

Return a flattened dataframe when a detected table is recognized as hierarchical.

generate_additional_metadata: Optional[bool]

Whether to generate additional metadata (title, description) for each extracted region.

include_hidden_cells: Optional[bool]

Whether to include hidden cells when extracting regions from the spreadsheet.

sheet_names: Optional[List[str]]

The names of the sheets to extract regions from. If empty, all sheets will be processed.

specialization: Optional[str]

table_merge_sensitivity: Optional[Literal["strong", "weak"]]

One of the following:

"strong"

"weak"

use_experimental_processing: Optional[bool]

Enables experimental processing. Accuracy may be impacted.

BetaDirectories

Create Directory

beta.directories.create() -> DirectoryCreateResponse

POST/api/v1/beta/directories

List Directories

beta.directories.list() -> SyncPaginatedCursor[DirectoryListResponse]

GET/api/v1/beta/directories

Get Directory

beta.directories.get(, ) -> DirectoryGetResponse

GET/api/v1/beta/directories/{directory_id}

Update Directory

beta.directories.update(, ) -> DirectoryUpdateResponse

PATCH/api/v1/beta/directories/{directory_id}

Delete Directory

beta.directories.delete(, )

DELETE/api/v1/beta/directories/{directory_id}

ModelsExpand Collapse

class DirectoryCreateResponse: …

API response schema for a directory.

id: str

Unique identifier for the directory.

Human-readable name for the directory.

minLength1

project_id: str

Project the directory belongs to.

created_at: Optional[datetime]

Creation datetime

formatdate-time

data_source_id: Optional[str]

Optional data source id the directory syncs from. Null if just manual uploads.

deleted_at: Optional[datetime]

Optional timestamp of when the directory was deleted. Null if not deleted.

formatdate-time

description: Optional[str]

Optional description shown to users.

updated_at: Optional[datetime]

Update datetime

formatdate-time

class DirectoryListResponse: …

API response schema for a directory.

id: str

Unique identifier for the directory.

Human-readable name for the directory.

minLength1

project_id: str

Project the directory belongs to.

created_at: Optional[datetime]

Creation datetime

formatdate-time

data_source_id: Optional[str]

Optional data source id the directory syncs from. Null if just manual uploads.

deleted_at: Optional[datetime]

Optional timestamp of when the directory was deleted. Null if not deleted.

formatdate-time

description: Optional[str]

Optional description shown to users.

updated_at: Optional[datetime]

Update datetime

formatdate-time

class DirectoryGetResponse: …

API response schema for a directory.

id: str

Unique identifier for the directory.

Human-readable name for the directory.

minLength1

project_id: str

Project the directory belongs to.

created_at: Optional[datetime]

Creation datetime

formatdate-time

data_source_id: Optional[str]

Optional data source id the directory syncs from. Null if just manual uploads.

deleted_at: Optional[datetime]

Optional timestamp of when the directory was deleted. Null if not deleted.

formatdate-time

description: Optional[str]

Optional description shown to users.

updated_at: Optional[datetime]

Update datetime

formatdate-time

class DirectoryUpdateResponse: …

API response schema for a directory.

id: str

Unique identifier for the directory.

Human-readable name for the directory.

minLength1

project_id: str

Project the directory belongs to.

created_at: Optional[datetime]

Creation datetime

formatdate-time

data_source_id: Optional[str]

Optional data source id the directory syncs from. Null if just manual uploads.

deleted_at: Optional[datetime]

Optional timestamp of when the directory was deleted. Null if not deleted.

formatdate-time

description: Optional[str]

Optional description shown to users.

updated_at: Optional[datetime]

Update datetime

formatdate-time

BetaDirectoriesFiles

Add Directory File

beta.directories.files.add(, ) -> FileAddResponse

POST/api/v1/beta/directories/{directory_id}/files

List Directory Files

beta.directories.files.list(, ) -> SyncPaginatedCursor[FileListResponse]

GET/api/v1/beta/directories/{directory_id}/files

Get Directory File

beta.directories.files.get(, ) -> FileGetResponse

GET/api/v1/beta/directories/{directory_id}/files/{directory_file_id}

Update Directory File

beta.directories.files.update(, ) -> FileUpdateResponse

PATCH/api/v1/beta/directories/{directory_id}/files/{directory_file_id}

Delete Directory File

beta.directories.files.delete(, )

DELETE/api/v1/beta/directories/{directory_id}/files/{directory_file_id}

Upload File To Directory

beta.directories.files.upload(, ) -> FileUploadResponse

POST/api/v1/beta/directories/{directory_id}/files/upload

ModelsExpand Collapse

class FileAddResponse: …

API response schema for a directory file.

id: str

Unique identifier for the directory file.

directory_id: str

Directory the file belongs to.

display_name: str

Display name for the file.

minLength1

project_id: str

Project the directory file belongs to.

unique_id: str

Unique identifier for the file in the directory

minLength1

created_at: Optional[datetime]

Creation datetime

formatdate-time

data_source_id: Optional[str]

Optional data source credential associated with the file.

deleted_at: Optional[datetime]

Soft delete marker when the file is removed upstream or by user action.

formatdate-time

file_id: Optional[str]

File ID for the storage location.

metadata: Optional[Dict[str, Union[str, float, bool, null]]]

Merged metadata from all sources. Higher-priority sources override lower.

One of the following:

str

float

bool

updated_at: Optional[datetime]

Update datetime

formatdate-time

class FileListResponse: …

API response schema for a directory file.

id: str

Unique identifier for the directory file.

directory_id: str

Directory the file belongs to.

display_name: str

Display name for the file.

minLength1

project_id: str

Project the directory file belongs to.

unique_id: str

Unique identifier for the file in the directory

minLength1

created_at: Optional[datetime]

Creation datetime

formatdate-time

data_source_id: Optional[str]

Optional data source credential associated with the file.

deleted_at: Optional[datetime]

Soft delete marker when the file is removed upstream or by user action.

formatdate-time

file_id: Optional[str]

File ID for the storage location.

metadata: Optional[Dict[str, Union[str, float, bool, null]]]

Merged metadata from all sources. Higher-priority sources override lower.

One of the following:

str

float

bool

updated_at: Optional[datetime]

Update datetime

formatdate-time

class FileGetResponse: …

API response schema for a directory file.

id: str

Unique identifier for the directory file.

directory_id: str

Directory the file belongs to.

display_name: str

Display name for the file.

minLength1

project_id: str

Project the directory file belongs to.

unique_id: str

Unique identifier for the file in the directory

minLength1

created_at: Optional[datetime]

Creation datetime

formatdate-time

data_source_id: Optional[str]

Optional data source credential associated with the file.

deleted_at: Optional[datetime]

Soft delete marker when the file is removed upstream or by user action.

formatdate-time

file_id: Optional[str]

File ID for the storage location.

metadata: Optional[Dict[str, Union[str, float, bool, null]]]

Merged metadata from all sources. Higher-priority sources override lower.

One of the following:

str

float

bool

updated_at: Optional[datetime]

Update datetime

formatdate-time

class FileUpdateResponse: …

API response schema for a directory file.

id: str

Unique identifier for the directory file.

directory_id: str

Directory the file belongs to.

display_name: str

Display name for the file.

minLength1

project_id: str

Project the directory file belongs to.

unique_id: str

Unique identifier for the file in the directory

minLength1

created_at: Optional[datetime]

Creation datetime

formatdate-time

data_source_id: Optional[str]

Optional data source credential associated with the file.

deleted_at: Optional[datetime]

Soft delete marker when the file is removed upstream or by user action.

formatdate-time

file_id: Optional[str]

File ID for the storage location.

metadata: Optional[Dict[str, Union[str, float, bool, null]]]

Merged metadata from all sources. Higher-priority sources override lower.

One of the following:

str

float

bool

updated_at: Optional[datetime]

Update datetime

formatdate-time

class FileUploadResponse: …

API response schema for a directory file.

id: str

Unique identifier for the directory file.

directory_id: str

Directory the file belongs to.

display_name: str

Display name for the file.

minLength1

project_id: str

Project the directory file belongs to.

unique_id: str

Unique identifier for the file in the directory

minLength1

created_at: Optional[datetime]

Creation datetime

formatdate-time

data_source_id: Optional[str]

Optional data source credential associated with the file.

deleted_at: Optional[datetime]

Soft delete marker when the file is removed upstream or by user action.

formatdate-time

file_id: Optional[str]

File ID for the storage location.

metadata: Optional[Dict[str, Union[str, float, bool, null]]]

Merged metadata from all sources. Higher-priority sources override lower.

One of the following:

str

float

bool

updated_at: Optional[datetime]

Update datetime

formatdate-time

BetaBatch

Create Batch Job

beta.batch.create() -> BatchCreateResponse

POST/api/v1/beta/batch-processing

List Batch Jobs

beta.batch.list() -> SyncPaginatedBatchItems[BatchListResponse]

GET/api/v1/beta/batch-processing

Get Batch Job Status

beta.batch.get_status(, ) -> BatchGetStatusResponse

GET/api/v1/beta/batch-processing/{job_id}

Cancel Batch Job

beta.batch.cancel(, ) -> BatchCancelResponse

POST/api/v1/beta/batch-processing/{job_id}/cancel

ModelsExpand Collapse

class BatchCreateResponse: …

Response schema for a batch processing job.

id: str

Unique identifier for the batch job

job_type: Literal["parse", "extract", "classify"]

Type of processing operation (parse or classify)

One of the following:

"parse"

"extract"

"classify"

project_id: str

Project this job belongs to

status: Literal["pending", "running", "dispatched", 3 more]

Current job status

One of the following:

"pending"

"running"

"dispatched"

"completed"

"failed"

"cancelled"

total_items: int

Total number of items in the job

completed_at: Optional[datetime]

Timestamp when job completed

formatdate-time

created_at: Optional[datetime]

Creation datetime

formatdate-time

directory_id: Optional[str]

Directory being processed

effective_at: Optional[datetime]

error_message: Optional[str]

Error message for the latest job attempt, if any.

failed_items: Optional[int]

Number of items that failed processing

job_record_id: Optional[str]

The job record ID associated with this status, if any.

processed_items: Optional[int]

Number of items processed so far

skipped_items: Optional[int]

Number of items skipped (already processed or size limit)

started_at: Optional[datetime]

Timestamp when job processing started

formatdate-time

updated_at: Optional[datetime]

Update datetime

formatdate-time

workflow_id: Optional[str]

Async job tracking ID

class BatchListResponse: …

Response schema for a batch processing job.

id: str

Unique identifier for the batch job

job_type: Literal["parse", "extract", "classify"]

Type of processing operation (parse or classify)

One of the following:

"parse"

"extract"

"classify"

project_id: str

Project this job belongs to

status: Literal["pending", "running", "dispatched", 3 more]

Current job status

One of the following:

"pending"

"running"

"dispatched"

"completed"

"failed"

"cancelled"

total_items: int

Total number of items in the job

completed_at: Optional[datetime]

Timestamp when job completed

formatdate-time

created_at: Optional[datetime]

Creation datetime

formatdate-time

directory_id: Optional[str]

Directory being processed

effective_at: Optional[datetime]

error_message: Optional[str]

Error message for the latest job attempt, if any.

failed_items: Optional[int]

Number of items that failed processing

job_record_id: Optional[str]

The job record ID associated with this status, if any.

processed_items: Optional[int]

Number of items processed so far

skipped_items: Optional[int]

Number of items skipped (already processed or size limit)

started_at: Optional[datetime]

Timestamp when job processing started

formatdate-time

updated_at: Optional[datetime]

Update datetime

formatdate-time

workflow_id: Optional[str]

Async job tracking ID

class BatchGetStatusResponse: …

Detailed status response for a batch processing job.

job: Job

Response schema for a batch processing job.

id: str

Unique identifier for the batch job

job_type: Literal["parse", "extract", "classify"]

Type of processing operation (parse or classify)

One of the following:

"parse"

"extract"

"classify"

project_id: str

Project this job belongs to

status: Literal["pending", "running", "dispatched", 3 more]

Current job status

One of the following:

"pending"

"running"

"dispatched"

"completed"

"failed"

"cancelled"

total_items: int

Total number of items in the job

completed_at: Optional[datetime]

Timestamp when job completed

formatdate-time

created_at: Optional[datetime]

Creation datetime

formatdate-time

directory_id: Optional[str]

Directory being processed

effective_at: Optional[datetime]

error_message: Optional[str]

Error message for the latest job attempt, if any.

failed_items: Optional[int]

Number of items that failed processing

job_record_id: Optional[str]

The job record ID associated with this status, if any.

processed_items: Optional[int]

Number of items processed so far

skipped_items: Optional[int]

Number of items skipped (already processed or size limit)

started_at: Optional[datetime]

Timestamp when job processing started

formatdate-time

updated_at: Optional[datetime]

Update datetime

formatdate-time

workflow_id: Optional[str]

Async job tracking ID

progress_percentage: float

Percentage of items processed (0-100)

maximum100

minimum0

class BatchCancelResponse: …

Response after cancelling a batch job.

job_id: str

ID of the cancelled job

message: str

Confirmation message

processed_items: int

Number of items processed before cancellation

status: Literal["pending", "running", "dispatched", 3 more]

New status (should be ‘cancelled’)

One of the following:

"pending"

"running"

"dispatched"

"completed"

"failed"

"cancelled"

BetaBatchJob Items

List Batch Job Items

beta.batch.job_items.list(, ) -> SyncPaginatedBatchItems[JobItemListResponse]

GET/api/v1/beta/batch-processing/{job_id}/items

Get Item Processing Results

beta.batch.job_items.get_processing_results(, ) -> JobItemGetProcessingResultsResponse

GET/api/v1/beta/batch-processing/items/{item_id}/processing-results

ModelsExpand Collapse

class JobItemListResponse: …

Detailed information about an item in a batch job.

item_id: str

ID of the item

item_name: str

Name of the item

status: Literal["pending", "processing", "completed", 3 more]

Processing status of this item

One of the following:

"pending"

"processing"

"completed"

"failed"

"skipped"

"cancelled"

completed_at: Optional[datetime]

When processing completed for this item

formatdate-time

effective_at: Optional[datetime]

error_message: Optional[str]

Error message for the latest job attempt, if any.

job_id: Optional[str]

Job ID for the underlying processing job (links to parse/extract job results)

job_record_id: Optional[str]

The job record ID associated with this status, if any.

skip_reason: Optional[str]

Reason item was skipped (e.g., ‘already_processed’, ‘size_limit_exceeded’)

started_at: Optional[datetime]

When processing started for this item

formatdate-time

class JobItemGetProcessingResultsResponse: …

Response containing all processing results for an item.

item_id: str

ID of the source item

item_name: str

Name of the source item

processing_results: Optional[List[ProcessingResult]]

List of all processing operations performed on this item

item_id: str

Source item that was processed

job_config: ProcessingResultJobConfig

Job configuration used for processing

One of the following:

class ProcessingResultJobConfigBatchParseJobRecordCreate: …

Batch-specific parse job record for batch processing.

This model contains the metadata and configuration for a batch parse job, but excludes file-specific information. It’s used as input to the batch parent workflow and combined with DirectoryFile data to create full ParseJobRecordCreate instances for each file.

Attributes: job_name: Must be PARSE_RAW_FILE partitions: Partitions for job output location parameters: Generic parse configuration (BatchParseJobConfig) session_id: Upstream request ID for tracking correlation_id: Correlation ID for cross-service tracking parent_job_execution_id: Parent job execution ID if nested user_id: User who created the job project_id: Project this job belongs to webhook_url: Optional webhook URL for job completion notifications

correlation_id: Optional[str]

The correlation ID for this job. Used for tracking the job across services.

formatuuid

job_name: Optional[Literal["parse_raw_file_job"]]

parameters: Optional[ProcessingResultJobConfigBatchParseJobRecordCreateParameters]

Generic parse job configuration for batch processing.

This model contains the parsing configuration that applies to all files in a batch, but excludes file-specific fields like file_name, file_id, etc. Those file-specific fields are populated from DirectoryFile data when creating individual ParseJobRecordCreate instances for each file.

The fields in this model should be generic settings that apply uniformly to all files being processed in the batch.

adaptive_long_table: Optional[bool]

aggressive_table_extraction: Optional[bool]

annotate_links: Optional[bool]

auto_mode: Optional[bool]

auto_mode_configuration_json: Optional[str]

auto_mode_trigger_on_image_in_page: Optional[bool]

auto_mode_trigger_on_regexp_in_page: Optional[str]

auto_mode_trigger_on_table_in_page: Optional[bool]

auto_mode_trigger_on_text_in_page: Optional[str]

azure_openai_api_version: Optional[str]

azure_openai_deployment_name: Optional[str]

azure_openai_endpoint: Optional[str]

azure_openai_key: Optional[str]

bbox_bottom: Optional[float]

bbox_left: Optional[float]

bbox_right: Optional[float]

bbox_top: Optional[float]

bounding_box: Optional[str]

compact_markdown_table: Optional[bool]

complemental_formatting_instruction: Optional[str]

content_guideline_instruction: Optional[str]

continuous_mode: Optional[bool]

custom_metadata: Optional[Dict[str, object]]

The custom metadata to attach to the documents.

disable_image_extraction: Optional[bool]

disable_ocr: Optional[bool]

disable_reconstruction: Optional[bool]

do_not_cache: Optional[bool]

do_not_unroll_columns: Optional[bool]

enable_cost_optimizer: Optional[bool]

extract_charts: Optional[bool]

extract_layout: Optional[bool]

extract_printed_page_number: Optional[bool]

fast_mode: Optional[bool]

formatting_instruction: Optional[str]

gpt4o_api_key: Optional[str]

gpt4o_mode: Optional[bool]

guess_xlsx_sheet_name: Optional[bool]

hide_footers: Optional[bool]

hide_headers: Optional[bool]

high_res_ocr: Optional[bool]

html_make_all_elements_visible: Optional[bool]

html_remove_fixed_elements: Optional[bool]

html_remove_navigation_elements: Optional[bool]

http_proxy: Optional[str]

ignore_document_elements_for_layout_detection: Optional[bool]

images_to_save: Optional[List[Literal["screenshot", "embedded", "layout"]]]

One of the following:

"screenshot"

"embedded"

"layout"

inline_images_in_markdown: Optional[bool]

input_s3_path: Optional[str]

input_s3_region: Optional[str]

The region for the input S3 bucket.

input_url: Optional[str]

internal_is_screenshot_job: Optional[bool]

invalidate_cache: Optional[bool]

is_formatting_instruction: Optional[bool]

job_timeout_extra_time_per_page_in_seconds: Optional[float]

job_timeout_in_seconds: Optional[float]

keep_page_separator_when_merging_tables: Optional[bool]

lang: Optional[str]

The language.

languages: Optional[List[ParsingLanguages]]

One of the following:

"af"

"az"

"bs"

"cs"

"cy"

"da"

"de"

"en"

"es"

"et"

"fr"

"ga"

"hr"

"hu"

"id"

"is"

"it"

"ku"

"la"

"lt"

"lv"

"mi"

"ms"

"mt"

"nl"

"no"

"oc"

"pi"

"pl"

"pt"

"ro"

"rs_latin"

"sk"

"sl"

"sq"

"sv"

"sw"

"tl"

"tr"

"uz"

"vi"

"ar"

"fa"

"ug"

"ur"

"bn"

"as"

"mni"

"ru"

"rs_cyrillic"

"be"

"bg"

"uk"

"mn"

"abq"

"ady"

"kbd"

"ava"

"dar"

"inh"

"che"

"lbe"

"lez"

"tab"

"tjk"

"hi"

"mr"

"ne"

"bh"

"mai"

"ang"

"bho"

"mah"

"sck"

"new"

"gom"

"sa"

"bgc"

"th"

"ch_sim"

"ch_tra"

"ja"

"ko"

"ta"

"te"

"kn"

layout_aware: Optional[bool]

line_level_bounding_box: Optional[bool]

markdown_table_multiline_header_separator: Optional[str]

max_pages: Optional[int]

max_pages_enforced: Optional[int]

merge_tables_across_pages_in_markdown: Optional[bool]

model: Optional[str]

outlined_table_extraction: Optional[bool]

output_pdf_of_document: Optional[bool]

output_s3_path_prefix: Optional[str]

If specified, llamaParse will save the output to the specified path. All output file will use this ‘prefix’ should be a valid s3:// url

output_s3_region: Optional[str]

The region for the output S3 bucket.

output_tables_as_html: Optional[bool]

output_bucket: Optional[str]

The output bucket.

page_error_tolerance: Optional[float]

page_footer_prefix: Optional[str]

page_footer_suffix: Optional[str]

page_header_prefix: Optional[str]

page_header_suffix: Optional[str]

page_prefix: Optional[str]

page_separator: Optional[str]

page_suffix: Optional[str]

parse_mode: Optional[ParsingMode]

Enum for representing the mode of parsing to be used.

One of the following:

"parse_page_without_llm"

"parse_page_with_llm"

"parse_page_with_lvm"

"parse_page_with_agent"

"parse_page_with_layout_agent"

"parse_document_with_llm"

"parse_document_with_lvm"

"parse_document_with_agent"

parsing_instruction: Optional[str]

pipeline_id: Optional[str]

The pipeline ID.

precise_bounding_box: Optional[bool]

premium_mode: Optional[bool]

presentation_out_of_bounds_content: Optional[bool]

presentation_skip_embedded_data: Optional[bool]

preserve_layout_alignment_across_pages: Optional[bool]

preserve_very_small_text: Optional[bool]

preset: Optional[str]

priority: Optional[Literal["low", "medium", "high", "critical"]]

The priority for the request. This field may be ignored or overwritten depending on the organization tier.

One of the following:

"low"

"medium"

"high"

"critical"

project_id: Optional[str]

remove_hidden_text: Optional[bool]

replace_failed_page_mode: Optional[FailPageMode]

Enum for representing the different available page error handling modes.

One of the following:

"raw_text"

"blank_page"

"error_message"

replace_failed_page_with_error_message_prefix: Optional[str]

replace_failed_page_with_error_message_suffix: Optional[str]

resource_info: Optional[Dict[str, object]]

The resource info about the file

save_images: Optional[bool]

skip_diagonal_text: Optional[bool]

specialized_chart_parsing_agentic: Optional[bool]

specialized_chart_parsing_efficient: Optional[bool]

specialized_chart_parsing_plus: Optional[bool]

specialized_image_parsing: Optional[bool]

spreadsheet_extract_sub_tables: Optional[bool]

spreadsheet_force_formula_computation: Optional[bool]

spreadsheet_include_hidden_sheets: Optional[bool]

strict_mode_buggy_font: Optional[bool]

strict_mode_image_extraction: Optional[bool]

strict_mode_image_ocr: Optional[bool]

strict_mode_reconstruction: Optional[bool]

structured_output: Optional[bool]

structured_output_json_schema: Optional[str]

structured_output_json_schema_name: Optional[str]

system_prompt: Optional[str]

system_prompt_append: Optional[str]

take_screenshot: Optional[bool]

target_pages: Optional[str]

tier: Optional[str]

type: Optional[Literal["parse"]]

use_vendor_multimodal_model: Optional[bool]

user_prompt: Optional[str]

vendor_multimodal_api_key: Optional[str]

vendor_multimodal_model_name: Optional[str]

version: Optional[str]

webhook_configurations: Optional[List[ProcessingResultJobConfigBatchParseJobRecordCreateParametersWebhookConfiguration]]

Outbound webhook endpoints to notify on job status changes

webhook_events: Optional[List[Literal["extract.pending", "extract.success", "extract.error", 14 more]]]

Events to subscribe to (e.g. ‘parse.success’, ‘extract.error’). If null, all events are delivered.

One of the following:

"extract.pending"

"extract.success"

"extract.error"

"extract.partial_success"

"extract.cancelled"

"parse.pending"

"parse.running"

"parse.success"

"parse.error"

"parse.partial_success"

"parse.cancelled"

"classify.pending"

"classify.success"

"classify.error"

"classify.partial_success"

"classify.cancelled"

"unmapped_event"

webhook_headers: Optional[Dict[str, str]]

Custom HTTP headers sent with each webhook request (e.g. auth tokens)

webhook_output_format: Optional[str]

Response format sent to the webhook: ‘string’ (default) or ‘json’

webhook_url: Optional[str]

URL to receive webhook POST notifications

webhook_url: Optional[str]

parent_job_execution_id: Optional[str]

The ID of the parent job execution.

formatuuid

partitions: Optional[Dict[str, str]]

The partitions for this execution. Used for determining where to save job output.

project_id: Optional[str]

The ID of the project this job belongs to.

formatuuid

session_id: Optional[str]

The upstream request ID that created this job. Used for tracking the job across services.

formatuuid

user_id: Optional[str]

The ID of the user that created this job

webhook_url: Optional[str]

The URL that needs to be called at the end of the parsing job.

class ClassifyJob: …

A classify job.

id: str

Unique identifier

formatuuid

project_id: str

The ID of the project

formatuuid

rules: List[ClassifierRule]

The rules to classify the files

description: str

Natural language description of what to classify. Be specific about the content characteristics that identify this document type.

maxLength500

minLength10

type: str

The document type to assign when this rule matches (e.g., ‘invoice’, ‘receipt’, ‘contract’)

maxLength50

minLength1

status: StatusEnum

The status of the classify job

One of the following:

"PENDING"

"SUCCESS"

"ERROR"

"PARTIAL_SUCCESS"

"CANCELLED"

user_id: str

The ID of the user

created_at: Optional[datetime]

Creation datetime

formatdate-time

effective_at: Optional[datetime]

error_message: Optional[str]

Error message for the latest job attempt, if any.

job_record_id: Optional[str]

The job record ID associated with this status, if any.

mode: Optional[Literal["FAST", "MULTIMODAL"]]

The classification mode to use

One of the following:

"FAST"

"MULTIMODAL"

parsing_configuration: Optional[ClassifyParsingConfiguration]

The configuration for the parsing job

lang: Optional[ParsingLanguages]

The language to parse the files in

One of the following:

"af"

"az"

"bs"

"cs"

"cy"

"da"

"de"

"en"

"es"

"et"

"fr"

"ga"

"hr"

"hu"

"id"

"is"

"it"

"ku"

"la"

"lt"

"lv"

"mi"

"ms"

"mt"

"nl"

"no"

"oc"

"pi"

"pl"

"pt"

"ro"

"rs_latin"

"sk"

"sl"

"sq"

"sv"

"sw"

"tl"

"tr"

"uz"

"vi"

"ar"

"fa"

"ug"

"ur"

"bn"

"as"

"mni"

"ru"

"rs_cyrillic"

"be"

"bg"

"uk"

"mn"

"abq"

"ady"

"kbd"

"ava"

"dar"

"inh"

"che"

"lbe"

"lez"

"tab"

"tjk"

"hi"

"mr"

"ne"

"bh"

"mai"

"ang"

"bho"

"mah"

"sck"

"new"

"gom"

"sa"

"bgc"

"th"

"ch_sim"

"ch_tra"

"ja"

"ko"

"ta"

"te"

"kn"

max_pages: Optional[int]

The maximum number of pages to parse

target_pages: Optional[List[int]]

The pages to target for parsing (0-indexed, so first page is at 0)

updated_at: Optional[datetime]

Update datetime

formatdate-time

job_type: Literal["parse", "extract", "classify"]

Type of processing performed

One of the following:

"parse"

"extract"

"classify"

output_s3_path: str

Location of the processing output

parameters_hash: str

Content hash of the job configuration for dedup

processed_at: datetime

When this processing occurred

formatdate-time

result_id: str

Unique identifier for this result

output_metadata: Optional[object]

Metadata about processing output.

Currently empty - will be populated with job-type-specific metadata fields in the future.

ModelsExpand Collapse

class SplitCategory: …

Category definition for document splitting.

Name of the category.

maxLength200

minLength1

description: Optional[str]

Optional description of what content belongs in this category.

maxLength2000

minLength1

class SplitDocumentInput: …

Document input specification for beta API.

type: str

Type of document input. Valid values are: file_id

value: str

Document identifier.

class SplitResultResponse: …

Result of a completed split job.

segments: List[SplitSegmentResponse]

List of document segments.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

class SplitSegmentResponse: …

A segment of the split document.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

class SplitCreateResponse: …

Beta response — uses nested document_input object.

id: str

Unique identifier for the split job.

categories: List[SplitCategory]

Categories used for splitting.

Name of the category.

maxLength200

minLength1

description: Optional[str]

Optional description of what content belongs in this category.

maxLength2000

minLength1

document_input: SplitDocumentInput

Document that was split.

type: str

Type of document input. Valid values are: file_id

value: str

Document identifier.

project_id: str

Project ID this job belongs to.

status: str

Current status of the job. Valid values are: pending, processing, completed, failed, cancelled.

user_id: str

User ID who created this job.

configuration_id: Optional[str]

Split configuration ID used for this job.

created_at: Optional[datetime]

Creation datetime

formatdate-time

error_message: Optional[str]

Error message if the job failed.

result: Optional[SplitResultResponse]

Result of a completed split job.

segments: List[SplitSegmentResponse]

List of document segments.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

updated_at: Optional[datetime]

Update datetime

formatdate-time

class SplitListResponse: …

Beta response — uses nested document_input object.

id: str

Unique identifier for the split job.

categories: List[SplitCategory]

Categories used for splitting.

Name of the category.

maxLength200

minLength1

description: Optional[str]

Optional description of what content belongs in this category.

maxLength2000

minLength1

document_input: SplitDocumentInput

Document that was split.

type: str

Type of document input. Valid values are: file_id

value: str

Document identifier.

project_id: str

Project ID this job belongs to.

status: str

Current status of the job. Valid values are: pending, processing, completed, failed, cancelled.

user_id: str

User ID who created this job.

configuration_id: Optional[str]

Split configuration ID used for this job.

created_at: Optional[datetime]

Creation datetime

formatdate-time

error_message: Optional[str]

Error message if the job failed.

result: Optional[SplitResultResponse]

Result of a completed split job.

segments: List[SplitSegmentResponse]

List of document segments.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

updated_at: Optional[datetime]

Update datetime

formatdate-time

class SplitGetResponse: …

Beta response — uses nested document_input object.

id: str

Unique identifier for the split job.

categories: List[SplitCategory]

Categories used for splitting.

Name of the category.

maxLength200

minLength1

description: Optional[str]

Optional description of what content belongs in this category.

maxLength2000

minLength1

document_input: SplitDocumentInput

Document that was split.

type: str

Type of document input. Valid values are: file_id

value: str

Document identifier.

project_id: str

Project ID this job belongs to.

status: str

Current status of the job. Valid values are: pending, processing, completed, failed, cancelled.

user_id: str

User ID who created this job.

configuration_id: Optional[str]

Split configuration ID used for this job.

created_at: Optional[datetime]

Creation datetime

formatdate-time

error_message: Optional[str]

Error message if the job failed.

result: Optional[SplitResultResponse]

Result of a completed split job.

segments: List[SplitSegmentResponse]

List of document segments.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

updated_at: Optional[datetime]

Update datetime

formatdate-time

Beta

BetaAgent Data

ModelsExpand Collapse

BetaSheets

ModelsExpand Collapse

BetaDirectories

ModelsExpand Collapse

BetaDirectoriesFiles

ModelsExpand Collapse

BetaBatch

ModelsExpand Collapse

BetaBatchJob Items

ModelsExpand Collapse

BetaSplit

ModelsExpand Collapse