Typecast

TypecastToolSpec #

Bases: BaseToolSpec

Typecast tool spec for text-to-speech synthesis with emotion control.

Source code in .build/python/llama-index-integrations/tools/llama-index-tools-typecast/llama_index/tools/typecast/base.py

class TypecastToolSpec(BaseToolSpec):
    """Typecast tool spec for text-to-speech synthesis with emotion control."""

    spec_functions = ["get_voices", "get_voice", "text_to_speech"]

    def __init__(self, api_key: str, host: Optional[str] = None) -> None:
        """
        Initialize with parameters.

        Args:
            api_key (str): Your Typecast API key
            host (Optional[str]): The base url of Typecast API (default: https://api.typecast.ai)

        """
        self.api_key = api_key
        self.host = host

    def get_voices(
        self,
        model: Optional[str] = None,
        gender: Optional[str] = None,
        age: Optional[str] = None,
        use_case: Optional[str] = None,
    ) -> List[dict]:
        """
        Get list of available voices from Typecast (V2 API).

        Args:
            model (Optional[str]): Filter by model name (e.g., "ssfm-v21", "ssfm-v30")
            gender (Optional[str]): Filter by gender ("male" or "female")
            age (Optional[str]): Filter by age group ("child", "teenager", "young_adult", "middle_age", "elder")
            use_case (Optional[str]): Filter by use case category (e.g., "Audiobook", "Game", "Podcast")

        Returns:
            List[dict]: List of available voices with their details including:
                - voice_id: Unique voice identifier
                - voice_name: Human-readable name
                - models: List of supported models with emotions
                - gender: Voice gender (optional)
                - age: Voice age group (optional)
                - use_cases: List of suitable use cases (optional)

        Raises:
            TypecastError: If API request fails

        """
        # Create the client
        client = Typecast(host=self.host, api_key=self.api_key)

        # Build filter if any parameters provided
        filter_obj = None
        if any([model, gender, age, use_case]):
            filter_obj = VoicesV2Filter(
                model=model,
                gender=gender,
                age=age,
                use_cases=use_case,
            )

        # Get the voices using V2 API
        response = client.voices_v2(filter=filter_obj)

        # Return the dumped voice models as dict
        return [voice.model_dump() for voice in response]

    def get_voice(self, voice_id: str) -> dict:
        """
        Get details of a specific voice from Typecast (V2 API).

        Args:
            voice_id (str): The voice ID to get details for (e.g., "tc_62a8975e695ad26f7fb514d1")

        Returns:
            dict: Voice details including:
                - voice_id: Unique voice identifier
                - voice_name: Human-readable name
                - models: List of supported models with emotions
                - gender: Voice gender (optional)
                - age: Voice age group (optional)
                - use_cases: List of suitable use cases (optional)

        Raises:
            NotFoundError: If voice not found
            TypecastError: If API request fails

        """
        # Create the client
        client = Typecast(host=self.host, api_key=self.api_key)

        # Get the voice using V2 API
        response = client.voice_v2(voice_id)

        # Return the dumped voice model as dict
        return response.model_dump()

    def text_to_speech(
        self,
        text: str,
        voice_id: str,
        output_path: str,
        model: str = "ssfm-v21",
        language: Optional[str] = None,
        emotion_preset: Optional[str] = "normal",
        emotion_intensity: Optional[float] = 1.0,
        volume: Optional[int] = 100,
        audio_pitch: Optional[int] = 0,
        audio_tempo: Optional[float] = 1.0,
        audio_format: Optional[str] = "wav",
        seed: Optional[int] = None,
    ) -> str:
        """
        Convert text to speech using Typecast API.

        Args:
            text (str): The text to convert to speech
            voice_id (str): The voice ID to use (e.g., "tc_62a8975e695ad26f7fb514d1")
            output_path (str): Path to save the audio file
            model (str): Voice model name (default: "ssfm-v21")
            language (Optional[str]): Language code (ISO 639-3, e.g., "eng", "kor")
            emotion_preset (Optional[str]): Emotion preset (normal, happy, sad, angry)
            emotion_intensity (Optional[float]): Emotion intensity (0.0 to 2.0)
            volume (Optional[int]): Volume (0 to 200, default: 100)
            audio_pitch (Optional[int]): Audio pitch (-12 to 12, default: 0)
            audio_tempo (Optional[float]): Audio tempo (0.5 to 2.0, default: 1.0)
            audio_format (Optional[str]): Audio format (wav or mp3, default: wav)
            seed (Optional[int]): Random seed for reproducible results

        Returns:
            str: Path to the generated audio file

        Raises:
            ValueError: If parameters are invalid
            BadRequestError: If request parameters are invalid
            UnauthorizedError: If API authentication fails
            PaymentRequiredError: If API quota exceeded
            NotFoundError: If resource not found
            UnprocessableEntityError: If validation error
            InternalServerError: If Typecast API server error
            TypecastError: If other API error occurs
            IOError: If file save fails

        """
        # Validate parameters
        if not text or not text.strip():
            raise ValueError("Text cannot be empty")
        if not voice_id:
            raise ValueError("Voice ID is required")
        if not output_path:
            raise ValueError("Output path is required")

        # Create client
        client = Typecast(host=self.host, api_key=self.api_key)

        # Build the request
        request = TTSRequest(
            voice_id=voice_id,
            text=text,
            model=model,
            language=language,
            prompt=Prompt(
                emotion_preset=emotion_preset,
                emotion_intensity=emotion_intensity,
            ),
            output=Output(
                volume=volume,
                audio_pitch=audio_pitch,
                audio_tempo=audio_tempo,
                audio_format=audio_format,
            ),
            seed=seed,
        )

        # Generate audio
        response = client.text_to_speech(request)

        # Save the audio
        with open(output_path, "wb") as fp:
            fp.write(response.audio_data)

        # Return the save location
        return output_path

get_voices #

get_voices(
    model: Optional[str] = None,
    gender: Optional[str] = None,
    age: Optional[str] = None,
    use_case: Optional[str] = None,
) -> List[dict]

Get list of available voices from Typecast (V2 API).

Parameters:

Name	Type	Description	Default
`model`	`Optional[str]`	Filter by model name (e.g., "ssfm-v21", "ssfm-v30")	`None`
`gender`	`Optional[str]`	Filter by gender ("male" or "female")	`None`
`age`	`Optional[str]`	Filter by age group ("child", "teenager", "young_adult", "middle_age", "elder")	`None`
`use_case`	`Optional[str]`	Filter by use case category (e.g., "Audiobook", "Game", "Podcast")	`None`

Returns:

Type	Description
`List[dict]`	List[dict]: List of available voices with their details including: - voice_id: Unique voice identifier - voice_name: Human-readable name - models: List of supported models with emotions - gender: Voice gender (optional) - age: Voice age group (optional) - use_cases: List of suitable use cases (optional)

Raises:

Type	Description
`TypecastError`	If API request fails

Source code in .build/python/llama-index-integrations/tools/llama-index-tools-typecast/llama_index/tools/typecast/base.py

def get_voices(
    self,
    model: Optional[str] = None,
    gender: Optional[str] = None,
    age: Optional[str] = None,
    use_case: Optional[str] = None,
) -> List[dict]:
    """
    Get list of available voices from Typecast (V2 API).

    Args:
        model (Optional[str]): Filter by model name (e.g., "ssfm-v21", "ssfm-v30")
        gender (Optional[str]): Filter by gender ("male" or "female")
        age (Optional[str]): Filter by age group ("child", "teenager", "young_adult", "middle_age", "elder")
        use_case (Optional[str]): Filter by use case category (e.g., "Audiobook", "Game", "Podcast")

    Returns:
        List[dict]: List of available voices with their details including:
            - voice_id: Unique voice identifier
            - voice_name: Human-readable name
            - models: List of supported models with emotions
            - gender: Voice gender (optional)
            - age: Voice age group (optional)
            - use_cases: List of suitable use cases (optional)

    Raises:
        TypecastError: If API request fails

    """
    # Create the client
    client = Typecast(host=self.host, api_key=self.api_key)

    # Build filter if any parameters provided
    filter_obj = None
    if any([model, gender, age, use_case]):
        filter_obj = VoicesV2Filter(
            model=model,
            gender=gender,
            age=age,
            use_cases=use_case,
        )

    # Get the voices using V2 API
    response = client.voices_v2(filter=filter_obj)

    # Return the dumped voice models as dict
    return [voice.model_dump() for voice in response]

get_voice #

get_voice(voice_id: str) -> dict

Get details of a specific voice from Typecast (V2 API).

Parameters:

Name	Type	Description	Default
`voice_id`	`str`	The voice ID to get details for (e.g., "tc_62a8975e695ad26f7fb514d1")	required

Returns:

Name	Type	Description
`dict`	`dict`	Voice details including: - voice_id: Unique voice identifier - voice_name: Human-readable name - models: List of supported models with emotions - gender: Voice gender (optional) - age: Voice age group (optional) - use_cases: List of suitable use cases (optional)

Raises:

Type	Description
`NotFoundError`	If voice not found
`TypecastError`	If API request fails

Source code in .build/python/llama-index-integrations/tools/llama-index-tools-typecast/llama_index/tools/typecast/base.py

def get_voice(self, voice_id: str) -> dict:
    """
    Get details of a specific voice from Typecast (V2 API).

    Args:
        voice_id (str): The voice ID to get details for (e.g., "tc_62a8975e695ad26f7fb514d1")

    Returns:
        dict: Voice details including:
            - voice_id: Unique voice identifier
            - voice_name: Human-readable name
            - models: List of supported models with emotions
            - gender: Voice gender (optional)
            - age: Voice age group (optional)
            - use_cases: List of suitable use cases (optional)

    Raises:
        NotFoundError: If voice not found
        TypecastError: If API request fails

    """
    # Create the client
    client = Typecast(host=self.host, api_key=self.api_key)

    # Get the voice using V2 API
    response = client.voice_v2(voice_id)

    # Return the dumped voice model as dict
    return response.model_dump()

text_to_speech #

text_to_speech(
    text: str,
    voice_id: str,
    output_path: str,
    model: str = "ssfm-v21",
    language: Optional[str] = None,
    emotion_preset: Optional[str] = "normal",
    emotion_intensity: Optional[float] = 1.0,
    volume: Optional[int] = 100,
    audio_pitch: Optional[int] = 0,
    audio_tempo: Optional[float] = 1.0,
    audio_format: Optional[str] = "wav",
    seed: Optional[int] = None,
) -> str

Convert text to speech using Typecast API.

Parameters:

Name	Type	Description	Default
`text`	`str`	The text to convert to speech	required
`voice_id`	`str`	The voice ID to use (e.g., "tc_62a8975e695ad26f7fb514d1")	required
`output_path`	`str`	Path to save the audio file	required
`model`	`str`	Voice model name (default: "ssfm-v21")	`'ssfm-v21'`
`language`	`Optional[str]`	Language code (ISO 639-3, e.g., "eng", "kor")	`None`
`emotion_preset`	`Optional[str]`	Emotion preset (normal, happy, sad, angry)	`'normal'`
`emotion_intensity`	`Optional[float]`	Emotion intensity (0.0 to 2.0)	`1.0`
`volume`	`Optional[int]`	Volume (0 to 200, default: 100)	`100`
`audio_pitch`	`Optional[int]`	Audio pitch (-12 to 12, default: 0)	`0`
`audio_tempo`	`Optional[float]`	Audio tempo (0.5 to 2.0, default: 1.0)	`1.0`
`audio_format`	`Optional[str]`	Audio format (wav or mp3, default: wav)	`'wav'`
`seed`	`Optional[int]`	Random seed for reproducible results	`None`

Returns:

Name	Type	Description
`str`	`str`	Path to the generated audio file

Raises:

Type	Description
`ValueError`	If parameters are invalid
`BadRequestError`	If request parameters are invalid
`UnauthorizedError`	If API authentication fails
`PaymentRequiredError`	If API quota exceeded
`NotFoundError`	If resource not found
`UnprocessableEntityError`	If validation error
`InternalServerError`	If Typecast API server error
`TypecastError`	If other API error occurs
`IOError`	If file save fails

Source code in .build/python/llama-index-integrations/tools/llama-index-tools-typecast/llama_index/tools/typecast/base.py

def text_to_speech(
    self,
    text: str,
    voice_id: str,
    output_path: str,
    model: str = "ssfm-v21",
    language: Optional[str] = None,
    emotion_preset: Optional[str] = "normal",
    emotion_intensity: Optional[float] = 1.0,
    volume: Optional[int] = 100,
    audio_pitch: Optional[int] = 0,
    audio_tempo: Optional[float] = 1.0,
    audio_format: Optional[str] = "wav",
    seed: Optional[int] = None,
) -> str:
    """
    Convert text to speech using Typecast API.

    Args:
        text (str): The text to convert to speech
        voice_id (str): The voice ID to use (e.g., "tc_62a8975e695ad26f7fb514d1")
        output_path (str): Path to save the audio file
        model (str): Voice model name (default: "ssfm-v21")
        language (Optional[str]): Language code (ISO 639-3, e.g., "eng", "kor")
        emotion_preset (Optional[str]): Emotion preset (normal, happy, sad, angry)
        emotion_intensity (Optional[float]): Emotion intensity (0.0 to 2.0)
        volume (Optional[int]): Volume (0 to 200, default: 100)
        audio_pitch (Optional[int]): Audio pitch (-12 to 12, default: 0)
        audio_tempo (Optional[float]): Audio tempo (0.5 to 2.0, default: 1.0)
        audio_format (Optional[str]): Audio format (wav or mp3, default: wav)
        seed (Optional[int]): Random seed for reproducible results

    Returns:
        str: Path to the generated audio file

    Raises:
        ValueError: If parameters are invalid
        BadRequestError: If request parameters are invalid
        UnauthorizedError: If API authentication fails
        PaymentRequiredError: If API quota exceeded
        NotFoundError: If resource not found
        UnprocessableEntityError: If validation error
        InternalServerError: If Typecast API server error
        TypecastError: If other API error occurs
        IOError: If file save fails

    """
    # Validate parameters
    if not text or not text.strip():
        raise ValueError("Text cannot be empty")
    if not voice_id:
        raise ValueError("Voice ID is required")
    if not output_path:
        raise ValueError("Output path is required")

    # Create client
    client = Typecast(host=self.host, api_key=self.api_key)

    # Build the request
    request = TTSRequest(
        voice_id=voice_id,
        text=text,
        model=model,
        language=language,
        prompt=Prompt(
            emotion_preset=emotion_preset,
            emotion_intensity=emotion_intensity,
        ),
        output=Output(
            volume=volume,
            audio_pitch=audio_pitch,
            audio_tempo=audio_tempo,
            audio_format=audio_format,
        ),
        seed=seed,
    )

    # Generate audio
    response = client.text_to_speech(request)

    # Save the audio
    with open(output_path, "wb") as fp:
        fp.write(response.audio_data)

    # Return the save location
    return output_path

options: members: - TypecastToolSpec