Struct google_api_proto::google::cloud::aiplatform::v1beta1::prediction_service_client::PredictionServiceClient
source · pub struct PredictionServiceClient<T> { /* private fields */ }
Expand description
A service for online predictions and explanations.
Implementations§
source§impl<T> PredictionServiceClient<T>where
T: GrpcService<BoxBody>,
T::Error: Into<StdError>,
T::ResponseBody: Body<Data = Bytes> + Send + 'static,
<T::ResponseBody as Body>::Error: Into<StdError> + Send,
impl<T> PredictionServiceClient<T>where
T: GrpcService<BoxBody>,
T::Error: Into<StdError>,
T::ResponseBody: Body<Data = Bytes> + Send + 'static,
<T::ResponseBody as Body>::Error: Into<StdError> + Send,
pub fn new(inner: T) -> Self
pub fn with_origin(inner: T, origin: Uri) -> Self
pub fn with_interceptor<F>(
inner: T,
interceptor: F,
) -> PredictionServiceClient<InterceptedService<T, F>>where
F: Interceptor,
T::ResponseBody: Default,
T: Service<Request<BoxBody>, Response = Response<<T as GrpcService<BoxBody>>::ResponseBody>>,
<T as Service<Request<BoxBody>>>::Error: Into<StdError> + Send + Sync,
sourcepub fn send_compressed(self, encoding: CompressionEncoding) -> Self
pub fn send_compressed(self, encoding: CompressionEncoding) -> Self
Compress requests with the given encoding.
This requires the server to support it otherwise it might respond with an error.
sourcepub fn accept_compressed(self, encoding: CompressionEncoding) -> Self
pub fn accept_compressed(self, encoding: CompressionEncoding) -> Self
Enable decompressing responses.
sourcepub fn max_decoding_message_size(self, limit: usize) -> Self
pub fn max_decoding_message_size(self, limit: usize) -> Self
Limits the maximum size of a decoded message.
Default: 4MB
sourcepub fn max_encoding_message_size(self, limit: usize) -> Self
pub fn max_encoding_message_size(self, limit: usize) -> Self
Limits the maximum size of an encoded message.
Default: usize::MAX
sourcepub async fn predict(
&mut self,
request: impl IntoRequest<PredictRequest>,
) -> Result<Response<PredictResponse>, Status>
pub async fn predict( &mut self, request: impl IntoRequest<PredictRequest>, ) -> Result<Response<PredictResponse>, Status>
Perform an online prediction.
sourcepub async fn raw_predict(
&mut self,
request: impl IntoRequest<RawPredictRequest>,
) -> Result<Response<HttpBody>, Status>
pub async fn raw_predict( &mut self, request: impl IntoRequest<RawPredictRequest>, ) -> Result<Response<HttpBody>, Status>
Perform an online prediction with an arbitrary HTTP payload.
The response includes the following HTTP headers:
-
X-Vertex-AI-Endpoint-Id
: ID of the [Endpoint][google.cloud.aiplatform.v1beta1.Endpoint] that served this prediction. -
X-Vertex-AI-Deployed-Model-Id
: ID of the Endpoint’s [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] that served this prediction.
sourcepub async fn stream_raw_predict(
&mut self,
request: impl IntoRequest<StreamRawPredictRequest>,
) -> Result<Response<Streaming<HttpBody>>, Status>
pub async fn stream_raw_predict( &mut self, request: impl IntoRequest<StreamRawPredictRequest>, ) -> Result<Response<Streaming<HttpBody>>, Status>
Perform a streaming online prediction with an arbitrary HTTP payload.
sourcepub async fn direct_predict(
&mut self,
request: impl IntoRequest<DirectPredictRequest>,
) -> Result<Response<DirectPredictResponse>, Status>
pub async fn direct_predict( &mut self, request: impl IntoRequest<DirectPredictRequest>, ) -> Result<Response<DirectPredictResponse>, Status>
Perform an unary online prediction request to a gRPC model server for Vertex first-party products and frameworks.
sourcepub async fn direct_raw_predict(
&mut self,
request: impl IntoRequest<DirectRawPredictRequest>,
) -> Result<Response<DirectRawPredictResponse>, Status>
pub async fn direct_raw_predict( &mut self, request: impl IntoRequest<DirectRawPredictRequest>, ) -> Result<Response<DirectRawPredictResponse>, Status>
Perform an unary online prediction request to a gRPC model server for custom containers.
sourcepub async fn stream_direct_predict(
&mut self,
request: impl IntoStreamingRequest<Message = StreamDirectPredictRequest>,
) -> Result<Response<Streaming<StreamDirectPredictResponse>>, Status>
pub async fn stream_direct_predict( &mut self, request: impl IntoStreamingRequest<Message = StreamDirectPredictRequest>, ) -> Result<Response<Streaming<StreamDirectPredictResponse>>, Status>
Perform a streaming online prediction request to a gRPC model server for Vertex first-party products and frameworks.
sourcepub async fn stream_direct_raw_predict(
&mut self,
request: impl IntoStreamingRequest<Message = StreamDirectRawPredictRequest>,
) -> Result<Response<Streaming<StreamDirectRawPredictResponse>>, Status>
pub async fn stream_direct_raw_predict( &mut self, request: impl IntoStreamingRequest<Message = StreamDirectRawPredictRequest>, ) -> Result<Response<Streaming<StreamDirectRawPredictResponse>>, Status>
Perform a streaming online prediction request to a gRPC model server for custom containers.
sourcepub async fn streaming_predict(
&mut self,
request: impl IntoStreamingRequest<Message = StreamingPredictRequest>,
) -> Result<Response<Streaming<StreamingPredictResponse>>, Status>
pub async fn streaming_predict( &mut self, request: impl IntoStreamingRequest<Message = StreamingPredictRequest>, ) -> Result<Response<Streaming<StreamingPredictResponse>>, Status>
Perform a streaming online prediction request for Vertex first-party products and frameworks.
sourcepub async fn server_streaming_predict(
&mut self,
request: impl IntoRequest<StreamingPredictRequest>,
) -> Result<Response<Streaming<StreamingPredictResponse>>, Status>
pub async fn server_streaming_predict( &mut self, request: impl IntoRequest<StreamingPredictRequest>, ) -> Result<Response<Streaming<StreamingPredictResponse>>, Status>
Perform a server-side streaming online prediction request for Vertex LLM streaming.
sourcepub async fn streaming_raw_predict(
&mut self,
request: impl IntoStreamingRequest<Message = StreamingRawPredictRequest>,
) -> Result<Response<Streaming<StreamingRawPredictResponse>>, Status>
pub async fn streaming_raw_predict( &mut self, request: impl IntoStreamingRequest<Message = StreamingRawPredictRequest>, ) -> Result<Response<Streaming<StreamingRawPredictResponse>>, Status>
Perform a streaming online prediction request through gRPC.
sourcepub async fn explain(
&mut self,
request: impl IntoRequest<ExplainRequest>,
) -> Result<Response<ExplainResponse>, Status>
pub async fn explain( &mut self, request: impl IntoRequest<ExplainRequest>, ) -> Result<Response<ExplainResponse>, Status>
Perform an online explanation.
If [deployed_model_id][google.cloud.aiplatform.v1beta1.ExplainRequest.deployed_model_id] is specified, the corresponding DeployModel must have [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] populated. If [deployed_model_id][google.cloud.aiplatform.v1beta1.ExplainRequest.deployed_model_id] is not specified, all DeployedModels must have [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] populated.
sourcepub async fn count_tokens(
&mut self,
request: impl IntoRequest<CountTokensRequest>,
) -> Result<Response<CountTokensResponse>, Status>
pub async fn count_tokens( &mut self, request: impl IntoRequest<CountTokensRequest>, ) -> Result<Response<CountTokensResponse>, Status>
Perform a token counting.
sourcepub async fn generate_content(
&mut self,
request: impl IntoRequest<GenerateContentRequest>,
) -> Result<Response<GenerateContentResponse>, Status>
pub async fn generate_content( &mut self, request: impl IntoRequest<GenerateContentRequest>, ) -> Result<Response<GenerateContentResponse>, Status>
Generate content with multimodal inputs.
sourcepub async fn stream_generate_content(
&mut self,
request: impl IntoRequest<GenerateContentRequest>,
) -> Result<Response<Streaming<GenerateContentResponse>>, Status>
pub async fn stream_generate_content( &mut self, request: impl IntoRequest<GenerateContentRequest>, ) -> Result<Response<Streaming<GenerateContentResponse>>, Status>
Generate content with multimodal inputs with streaming support.
sourcepub async fn chat_completions(
&mut self,
request: impl IntoRequest<ChatCompletionsRequest>,
) -> Result<Response<Streaming<HttpBody>>, Status>
pub async fn chat_completions( &mut self, request: impl IntoRequest<ChatCompletionsRequest>, ) -> Result<Response<Streaming<HttpBody>>, Status>
Exposes an OpenAI-compatible endpoint for chat completions.
Trait Implementations§
source§impl<T: Clone> Clone for PredictionServiceClient<T>
impl<T: Clone> Clone for PredictionServiceClient<T>
source§fn clone(&self) -> PredictionServiceClient<T>
fn clone(&self) -> PredictionServiceClient<T>
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moreAuto Trait Implementations§
impl<T> !Freeze for PredictionServiceClient<T>
impl<T> RefUnwindSafe for PredictionServiceClient<T>where
T: RefUnwindSafe,
impl<T> Send for PredictionServiceClient<T>where
T: Send,
impl<T> Sync for PredictionServiceClient<T>where
T: Sync,
impl<T> Unpin for PredictionServiceClient<T>where
T: Unpin,
impl<T> UnwindSafe for PredictionServiceClient<T>where
T: UnwindSafe,
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
source§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T
in a tonic::Request