Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Runtime Assisted Mount and Management enhancements #526

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions csi.proto
Original file line number Diff line number Diff line change
Expand Up @@ -387,13 +387,19 @@ message VolumeCapability {
message MountVolume {
// The filesystem type. This field is OPTIONAL.
// An empty string is equal to an unspecified field value.
// If SP has DEFER_FS_OPS node capability and CO specifies
// fs_defer_ops = true then SP MUST pass this field
// to the container runtime that will mount the file system.
string fs_type = 1;

// The mount options that can be used for the volume. This field is
// OPTIONAL. `mount_flags` MAY contain sensitive information.
// Therefore, the CO and the Plugin MUST NOT leak this information
// to untrusted entities. The total size of this repeated field
// SHALL NOT exceed 4 KiB.
// If SP has DEFER_FS_OPS node capability and CO specifies
// fs_defer_ops = true then SP MUST pass this field
// to the container runtime that will mount the file system.
repeated string mount_flags = 2;

// If SP has VOLUME_MOUNT_GROUP node capability and CO provides
Expand All @@ -408,6 +414,27 @@ message VolumeCapability {
// both readable and writable by said mount group identifier.
// This is an OPTIONAL field.
string volume_mount_group = 3;

// If SP has DEFER_FS_OPS_WITH_SUPPLEMENTAL_GROUP node capability
// and CO provides this field then SP MUST ensure that the
// volume_supplemental_group parameter is passed as a supplemental
// Group ID that owns the file system after it has been mounted by
// the container runtime handler.
// A CO MUST NOT populate this field if defer_fs_ops is empty
// This is an OPTIONAL field.
string volume_supplemental_group = 4 [(alpha_field) = true];

// If SP has DEFER_FS_OPS_WITH_SUPPLEMENTAL_GROUP_CHANGE_POLICY node
// capability and CO provides this field then SP MUST ensure that
// the volume_supplemental_group_change_policy parameter is passed
// as the policy through which ownership by a supplemental Group ID
// is set after it has been mounted by the container runtime
// handler.
// A CO MUST NOT populate this field if defer_fs_ops or
// volume_supplemental_group is empty
// This is an OPTIONAL field.
string volume_supplemental_group_change_policy = 5
[(alpha_field) = true];
}

// Specify how a volume can be accessed.
Expand Down Expand Up @@ -1383,6 +1410,13 @@ message NodePublishVolumeRequest {
// This field is OPTIONAL and MUST match the volume_context of the
// volume identified by `volume_id`.
map<string, string> volume_context = 8;

// Indicates SP MUST defer file system mount and any post-mount
// configuration operations (such as application of file system
// ownership by a supplemental group, if supported) to
// a container runtime handler.
// This field is OPTIONAL.
bool defer_fs_ops = 9;
}

message NodePublishVolumeResponse {
Expand All @@ -1401,6 +1435,11 @@ message NodeUnpublishVolumeRequest {
// system/filesystem, but, at a minimum, SP MUST accept a max path
// length of at least 128 bytes.
string target_path = 2;

// Indicates SP MUST defer file system dismount and cleanup
// to a container runtime handler.
// This field is OPTIONAL.
bool defer_fs_ops = 3;
}

message NodeUnpublishVolumeResponse {
Expand Down Expand Up @@ -1431,6 +1470,11 @@ message NodeGetVolumeStatsRequest {
// system/filesystem, but, at a minimum, SP MUST accept a max path
// length of at least 128 bytes.
string staging_target_path = 3;

// Indicates SP MUST obtain file system stats from a
// container runtime handler (that has mounted the file system).
// This field is OPTIONAL.
bool defer_fs_ops = 4;
}

message NodeGetVolumeStatsResponse {
Expand Down Expand Up @@ -1528,6 +1572,22 @@ message NodeServiceCapability {
// with provided volume group identifier during node stage
// or node publish RPC calls.
VOLUME_MOUNT_GROUP = 6;

// Indicates that Node service supports deferring file system
// mount and management operations to a container runtime handler.
DEFER_FS_OPS = 7 [(alpha_enum_value) = true];

// Indicates that Node service supports passing a supplemental
// Group ID as a post mount configuration when deferring
// file system mount to a container runtime handler.
DEFER_FS_OPS_WITH_SUPPLEMENTAL_GROUP = 8
[(alpha_enum_value) = true];

// Indicates that Node service supports passing a supplemental
// Group ID change policy as a post mount configuration when
// deferring file system mount to a container runtime handler.
DEFER_FS_OPS_WITH_SUPPLEMENTAL_GROUP_CHANGE_POLICY = 9
[(alpha_enum_value) = true];
}

Type type = 1;
Expand Down Expand Up @@ -1581,6 +1641,13 @@ message NodeGetInfoResponse {
// Indicates the node exists within the "region" "R1" and the "zone"
// "Z2".
Topology accessible_topology = 3;

// If SP has DEFER_FS_OPS node capability, a plugin MUST populate
// this field with the list of file systems that it supports. A CO
// SHOULD use this to match the deferral capabilities of a plugin
// with a container runtime handler for a workload.
// This field is OPTIONAL.
repeated string supported_file_systems = 4;
}
message NodeExpandVolumeRequest {
// The ID of the volume. This field is REQUIRED.
Expand Down Expand Up @@ -1628,6 +1695,11 @@ message NodeExpandVolumeRequest {
// section on how to use this field.
map<string, string> secrets = 6
[(csi_secret) = true, (alpha_field) = true];

// Indicates SP MUST defer file system expansion to a
// container runtime handler (that has mounted the file system).
// This field is OPTIONAL.
bool defer_fs_ops = 7 [(alpha_enum_value) = true];
}

message NodeExpandVolumeResponse {
Expand Down
79 changes: 79 additions & 0 deletions spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -854,13 +854,19 @@ message VolumeCapability {
message MountVolume {
// The filesystem type. This field is OPTIONAL.
// An empty string is equal to an unspecified field value.
// If SP has DEFER_FS_OPS node capability and CO specifies
// fs_defer_ops = true then SP MUST pass this field
// to the container runtime that will mount the file system.
string fs_type = 1;

// The mount options that can be used for the volume. This field is
// OPTIONAL. `mount_flags` MAY contain sensitive information.
// Therefore, the CO and the Plugin MUST NOT leak this information
// to untrusted entities. The total size of this repeated field
// SHALL NOT exceed 4 KiB.
// If SP has DEFER_FS_OPS node capability and CO specifies
// fs_defer_ops = true then SP MUST pass this field
// to the container runtime that will mount the file system.
repeated string mount_flags = 2;

// If SP has VOLUME_MOUNT_GROUP node capability and CO provides
Expand All @@ -875,6 +881,27 @@ message VolumeCapability {
// both readable and writable by said mount group identifier.
// This is an OPTIONAL field.
string volume_mount_group = 3;

// If SP has DEFER_FS_OPS_WITH_SUPPLEMENTAL_GROUP node capability
// and CO provides this field then SP MUST ensure that the
// volume_supplemental_group parameter is passed as a supplemental
// Group ID that owns the file system after it has been mounted by
// the container runtime handler.
// A CO MUST NOT populate this field if defer_fs_ops is empty
// This is an OPTIONAL field.
string volume_supplemental_group = 4 [(alpha_field) = true];

// If SP has DEFER_FS_OPS_WITH_SUPPLEMENTAL_GROUP_CHANGE_POLICY node
// capability and CO provides this field then SP MUST ensure that
// the volume_supplemental_group_change_policy parameter is passed
// as the policy through which ownership by a supplemental Group ID
// is set after it has been mounted by the container runtime
// handler.
// A CO MUST NOT populate this field if defer_fs_ops or
// volume_supplemental_group is empty
// This is an OPTIONAL field.
string volume_supplemental_group_change_policy = 5
[(alpha_field) = true];
}

// Specify how a volume can be accessed.
Expand Down Expand Up @@ -2349,6 +2376,13 @@ message NodePublishVolumeRequest {
// This field is OPTIONAL and MUST match the volume_context of the
// volume identified by `volume_id`.
map<string, string> volume_context = 8;

// Indicates SP MUST defer file system mount and any post-mount
// configuration operations (such as application of file system
// ownership by a supplemental group, if supported) to
// a container runtime handler.
// This field is OPTIONAL.
bool defer_fs_ops = 9;
}

message NodePublishVolumeResponse {
Expand Down Expand Up @@ -2399,6 +2433,11 @@ message NodeUnpublishVolumeRequest {
// system/filesystem, but, at a minimum, SP MUST accept a max path
// length of at least 128 bytes.
string target_path = 2;

// Indicates SP MUST defer file system dismount and cleanup
// to a container runtime handler.
// This field is OPTIONAL.
bool defer_fs_ops = 3;
}

message NodeUnpublishVolumeResponse {
Expand Down Expand Up @@ -2455,6 +2494,11 @@ message NodeGetVolumeStatsRequest {
// system/filesystem, but, at a minimum, SP MUST accept a max path
// length of at least 128 bytes.
string staging_target_path = 3;

// Indicates SP MUST obtain file system stats from a
// container runtime handler (that has mounted the file system).
// This field is OPTIONAL.
bool defer_fs_ops = 4;
}

message NodeGetVolumeStatsResponse {
Expand Down Expand Up @@ -2571,6 +2615,22 @@ message NodeServiceCapability {
// with provided volume group identifier during node stage
// or node publish RPC calls.
VOLUME_MOUNT_GROUP = 6;

// Indicates that Node service supports deferring file system
// mount and management operations to a container runtime handler.
DEFER_FS_OPS = 7 [(alpha_enum_value) = true];

// Indicates that Node service supports passing a supplemental
// Group ID as a post mount configuration when deferring
// file system mount to a container runtime handler.
DEFER_FS_OPS_WITH_SUPPLEMENTAL_GROUP = 8
[(alpha_enum_value) = true];

// Indicates that Node service supports passing a supplemental
// Group ID change policy as a post mount configuration when
// deferring file system mount to a container runtime handler.
DEFER_FS_OPS_WITH_SUPPLEMENTAL_GROUP_CHANGE_POLICY = 9
[(alpha_enum_value) = true];
}

Type type = 1;
Expand Down Expand Up @@ -2640,6 +2700,13 @@ message NodeGetInfoResponse {
// Indicates the node exists within the "region" "R1" and the "zone"
// "Z2".
Topology accessible_topology = 3;

// If SP has DEFER_FS_OPS node capability, a plugin MUST populate
// this field with the list of file systems that it supports. A CO
// SHOULD use this to match the deferral capabilities of a plugin
// with a container runtime handler for a workload.
// This field is OPTIONAL.
repeated string supported_file_systems = 4;
}
```

Expand Down Expand Up @@ -2717,6 +2784,11 @@ message NodeExpandVolumeRequest {
// section on how to use this field.
map<string, string> secrets = 6
[(csi_secret) = true, (alpha_field) = true];

// Indicates SP MUST defer file system expansion to a
// container runtime handler (that has mounted the file system).
// This field is OPTIONAL.
bool defer_fs_ops = 7 [(alpha_enum_value) = true];
}

message NodeExpandVolumeResponse {
Expand Down Expand Up @@ -2785,6 +2857,13 @@ message NodeExpandVolumeResponse {
* Plugins SHALL NOT create additional files or directories adjacent to the UNIX socket specified by `CSI_ENDPOINT`; violations of this requirement constitute "abuse".
* The Plugin Supervisor is the ultimate authority of the directory in which the UNIX socket endpoint is created and MAY enforce policies to prevent and/or mitigate abuse of the directory by Plugins.

#### Deferring Filesystem Mount and Management to a Container Runtime Handler
A Plugin may have the capability to defer file system mount and management operations to a container runtime handler.
The CO SHOULD populate `defer_fs_ops` as `True` in CSI Node APIs when the following conditions are fulfilled:
- The container runtime handler (associated with a workload) supports deferral of file system mount and management operations from a CSI plugin.
- The CSI plugin is able to support deferral of file system mount and management operations to a container runtime handler.
- Both container runtime handler and CSI plugin is compatible around support for mounting specific file systems and applying post-mount configuration based on the workload spec (e.g. supplemental group ownership)

### Supervised Lifecycle Management

* For Plugins packaged in software form:
Expand Down