Skip to content

SentenceSplitter

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:20

Parse text with a preference for complete sentences.

SentenceSplitter<Options>(nodes, options?): TextNode<Metadata>[]

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:20

Parse text with a preference for complete sentences.

Options extends Record<string, unknown>

BaseNode<Metadata>[]

Options

TextNode<Metadata>[]

new SentenceSplitter(params?): SentenceSplitter

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:53

PartialWithUndefined<{ chunkSize: number; chunkOverlap: number; separator: string; paragraphSeparator: string; secondaryChunkingRegex: string; extraAbbreviations: string[]; }> & SplitterParams & object

SentenceSplitter

MetadataAwareTextSplitter.constructor

includeMetadata: boolean = true

Defined in: .build/typescript/packages/core/src/node-parser/base.ts:17

MetadataAwareTextSplitter.includeMetadata


includePrevNextRel: boolean = true

Defined in: .build/typescript/packages/core/src/node-parser/base.ts:18

MetadataAwareTextSplitter.includePrevNextRel


chunkSize: number = 1024

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:24

The token chunk size for each chunk.


chunkOverlap: number = 200

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:28

The token overlap of each chunk when splitting.


separator: string = " "

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:32

Default separator for splitting into words


paragraphSeparator: string = "\n\n\n"

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:36

Separator between paragraphs.


secondaryChunkingRegex: string = "[^,.;。?!]+[,.;。?!]?"

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:40

Backup regex for splitting into sentences.


extraAbbreviations: undefined | string[] = []

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:45

Extra abbreviations to consider while splitting into sentences. For example, for contracts, you may want to consider “LLC.” as an important abbreviation


id: string

Defined in: .build/typescript/packages/core/src/schema/type.ts:22

MetadataAwareTextSplitter.id

protected postProcessParsedNodes(nodes, parentDocMap): TextNode<Metadata>[]

Defined in: .build/typescript/packages/core/src/node-parser/base.ts:27

TextNode<Metadata>[]

Map<string, TextNode<Metadata>>

TextNode<Metadata>[]

MetadataAwareTextSplitter.postProcessParsedNodes


getNodesFromDocuments(documents): TextNode<Metadata>[]

Defined in: .build/typescript/packages/core/src/node-parser/base.ts:84

TextNode<Metadata>[]

TextNode<Metadata>[]

MetadataAwareTextSplitter.getNodesFromDocuments


splitTexts(texts): string[]

Defined in: .build/typescript/packages/core/src/node-parser/base.ts:126

string[]

string[]

MetadataAwareTextSplitter.splitTexts


splitTextsMetadataAware(texts, metadata): string[]

Defined in: .build/typescript/packages/core/src/node-parser/base.ts:142

string[]

string[]

string[]

MetadataAwareTextSplitter.splitTextsMetadataAware


protected getMetadataString(node): string

Defined in: .build/typescript/packages/core/src/node-parser/base.ts:151

TextNode

string

MetadataAwareTextSplitter.getMetadataString


protected parseNodes(nodes): TextNode<Metadata>[]

Defined in: .build/typescript/packages/core/src/node-parser/base.ts:161

TextNode<Metadata>[]

TextNode<Metadata>[]

MetadataAwareTextSplitter.parseNodes


splitTextMetadataAware(text, metadata): string[]

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:80

string

string

string[]

MetadataAwareTextSplitter.splitTextMetadataAware


splitText(text): string[]

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:95

string

string[]

MetadataAwareTextSplitter.splitText


_splitText(text, chunkSize): string[]

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:99

string

number

string[]


tokenSize(text): number

Defined in: .build/typescript/packages/core/src/node-parser/sentence-splitter.ts:228

string

number