Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,10 @@ message WorkerPoolSpec {
// Optional. List of NFS mount spec.
repeated NfsMount nfs_mounts = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. List of Lustre mounts.
repeated LustreMount lustre_mounts = 9
[(google.api.field_behavior) = OPTIONAL];

// Disk spec.
DiskSpec disk_spec = 5;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,35 +85,31 @@ message MachineSpec {
];
}

// A description of resources that are dedicated to a DeployedModel, and
// that need a higher degree of manual configuration.
// A description of resources that are dedicated to a DeployedModel or
// DeployedIndex, and that need a higher degree of manual configuration.
message DedicatedResources {
// Required. Immutable. The specification of a single machine used by the
// prediction.
// Required. Immutable. The specification of a single machine being used.
MachineSpec machine_spec = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.field_behavior) = IMMUTABLE
];

// Required. Immutable. The minimum number of machine replicas this
// DeployedModel will be always deployed on. This value must be greater than
// or equal to 1.
// Required. Immutable. The minimum number of machine replicas that will be
// always deployed on. This value must be greater than or equal to 1.
//
// If traffic against the DeployedModel increases, it may dynamically be
// deployed onto more replicas, and as traffic decreases, some of these extra
// replicas may be freed.
// If traffic increases, it may dynamically be deployed onto more replicas,
// and as traffic decreases, some of these extra replicas may be freed.
int32 min_replica_count = 2 [
(google.api.field_behavior) = REQUIRED,
(google.api.field_behavior) = IMMUTABLE
];

// Immutable. The maximum number of replicas this DeployedModel may be
// deployed on when the traffic against it increases. If the requested value
// is too large, the deployment will error, but if deployment succeeds then
// the ability to scale the model to that many replicas is guaranteed (barring
// service outages). If traffic against the DeployedModel increases beyond
// what its replicas at maximum may handle, a portion of the traffic will be
// dropped. If this value is not provided, will use
// Immutable. The maximum number of replicas that may be deployed on when the
// traffic against it increases. If the requested value is too large, the
// deployment will error, but if deployment succeeds then the ability to scale
// to that many replicas is guaranteed (barring service outages). If traffic
// increases beyond what its replicas at maximum may handle, a portion of the
// traffic will be dropped. If this value is not provided, will use
// [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count]
// as the default value.
//
Expand All @@ -124,8 +120,8 @@ message DedicatedResources {
int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];

// Optional. Number of required available replicas for the deployment to
// succeed. This field is only needed when partial model deployment/mutation
// is desired. If set, the model deploy/mutate operation will succeed once
// succeed. This field is only needed when partial deployment/mutation is
// desired. If set, the deploy/mutate operation will succeed once
// available_replica_count reaches required_replica_count, and the rest of
// the replicas will be retried. If not set, the default
// required_replica_count will be min_replica_count.
Expand Down Expand Up @@ -166,23 +162,22 @@ message DedicatedResources {
// and require only a modest additional configuration.
// Each Model supporting these resources documents its specific guidelines.
message AutomaticResources {
// Immutable. The minimum number of replicas this DeployedModel will be always
// deployed on. If traffic against it increases, it may dynamically be
// deployed onto more replicas up to
// Immutable. The minimum number of replicas that will be always deployed on.
// If traffic against it increases, it may dynamically be deployed onto more
// replicas up to
// [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count],
// and as traffic decreases, some of these extra replicas may be freed. If the
// requested value is too large, the deployment will error.
int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE];

// Immutable. The maximum number of replicas this DeployedModel may be
// deployed on when the traffic against it increases. If the requested value
// is too large, the deployment will error, but if deployment succeeds then
// the ability to scale the model to that many replicas is guaranteed (barring
// service outages). If traffic against the DeployedModel increases beyond
// what its replicas at maximum may handle, a portion of the traffic will be
// dropped. If this value is not provided, a no upper bound for scaling under
// heavy traffic will be assume, though Vertex AI may be unable to scale
// beyond certain replica number.
// Immutable. The maximum number of replicas that may be deployed on when the
// traffic against it increases. If the requested value is too large, the
// deployment will error, but if deployment succeeds then the ability to scale
// to that many replicas is guaranteed (barring service outages). If traffic
// increases beyond what its replicas at maximum may handle, a portion of the
// traffic will be dropped. If this value is not provided, a no upper bound
// for scaling under heavy traffic will be assume, though Vertex AI may be
// unable to scale beyond certain replica number.
int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
}

Expand Down Expand Up @@ -215,9 +210,10 @@ message ResourcesConsumed {

// Represents the spec of disk options.
message DiskSpec {
// Type of the boot disk (default is "pd-ssd").
// Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
// "pd-standard" (Persistent Disk Hard Disk Drive).
// Type of the boot disk. For non-A3U machines, the default value is
// "pd-ssd", for A3U machines, the default value is "hyperdisk-balanced".
// Valid values: "pd-ssd" (Persistent Disk Solid State Drive),
// "pd-standard" (Persistent Disk Hard Disk Drive) or "hyperdisk-balanced".
string boot_disk_type = 1;

// Size in GB of the boot disk (default is 100GB).
Expand Down Expand Up @@ -253,6 +249,22 @@ message NfsMount {
string mount_point = 3 [(google.api.field_behavior) = REQUIRED];
}

// Represents a mount configuration for Lustre file system.
message LustreMount {
// Required. IP address of the Lustre instance.
string instance_ip = 1 [(google.api.field_behavior) = REQUIRED];

// Required. The unique identifier of the Lustre volume.
string volume_handle = 2 [(google.api.field_behavior) = REQUIRED];

// Required. The name of the Lustre filesystem.
string filesystem = 3 [(google.api.field_behavior) = REQUIRED];

// Required. Destination mount path. The Lustre file system will be mounted
// for the user under /mnt/lustre/<mount_point>
string mount_point = 4 [(google.api.field_behavior) = REQUIRED];
}

// The metric specification that defines the target resource utilization
// (CPU utilization, accelerator's duty cycle, and so on) for calculating the
// desired replica count.
Expand All @@ -263,6 +275,7 @@ message AutoscalingMetricSpec {
// * For Online Prediction:
// * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
// * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
// * `aiplatform.googleapis.com/prediction/online/request_count`
string metric_name = 1 [(google.api.field_behavior) = REQUIRED];

// The target resource utilization in percentage (1% - 100%) for the given
Expand Down
Loading