Skip to content

vllm.entrypoints.pooling.base.protocol ¶

CompletionRequestMixin ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/pooling/base/protocol.py

class CompletionRequestMixin(OpenAIBaseModel):
    input: list[int] | list[list[int]] | str | list[str]

    add_special_tokens: bool = Field(
        default=True,
        description=(
            "If true (the default), special tokens (e.g. BOS) will be added to "
            "the prompt."
        ),
    )

add_special_tokens `class-attribute` `instance-attribute` ¶

add_special_tokens: bool = Field(
    default=True,
    description="If true (the default), special tokens (e.g. BOS) will be added to the prompt.",
)

input `instance-attribute` ¶

input: list[int] | list[list[int]] | str | list[str]

PoolingBasicRequestMixin ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/pooling/base/protocol.py

class PoolingBasicRequestMixin(OpenAIBaseModel):
    model: str | None = None
    user: str | None = None
    truncate_prompt_tokens: Annotated[int, Field(ge=-1)] | None = None

    request_id: str = Field(
        default_factory=random_uuid,
        description=(
            "The request_id related to this request. If the caller does "
            "not set it, a random_uuid will be generated. This id is used "
            "through out the inference process and return in response."
        ),
    )

    priority: int = Field(
        default=0,
        description=(
            "The priority of the request (lower means earlier handling; "
            "default: 0). Any priority other than 0 will raise an error "
            "if the served model does not use priority scheduling."
        ),
    )

model `class-attribute` `instance-attribute` ¶

model: str | None = None

priority `class-attribute` `instance-attribute` ¶

priority: int = Field(
    default=0,
    description="The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
)

request_id `class-attribute` `instance-attribute` ¶

request_id: str = Field(
    default_factory=random_uuid,
    description="The request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.",
)

truncate_prompt_tokens `class-attribute` `instance-attribute` ¶

truncate_prompt_tokens: (
    Annotated[int, Field(ge=-1)] | None
) = None

user `class-attribute` `instance-attribute` ¶

user: str | None = None