diff --git a/doc/SAI-Proposal-Packet-Trimming.md b/doc/SAI-Proposal-Packet-Trimming.md new file mode 100644 index 000000000..e4b18ed62 --- /dev/null +++ b/doc/SAI-Proposal-Packet-Trimming.md @@ -0,0 +1,165 @@ +# Switch Abstraction Interface Change Proposal for Packet Trimming + +Title | Packet Trimming +------------|---------------- +Authors | Nvidia +Status | In review +Type | Standards track +Created | 8/28/2024 +SAI-Version | 1.14 +---------- + +## Overview +When the lossy queue exceeds a buffer threshold, it drops packets without any notification to the destination host. + +When a packet is lost, it can be recovered through fast retransmission (e.g., Go-Back-N in RoCE) or by using timeouts. Retransmission triggered by timeouts typically incurs significant latency. Packet trimming aims to facilitate rapid packet loss notification and, consequently, eliminate slow timeout-based retransmissions. + +To help the host recover data more quickly and accurately, we introduce a packet trimming feature, that upon a failed packet admission to a shared buffer, +will trim a packet to a configured size, and try sending it on a different queue to deliver a packet drop notification to an end host. + +``` + + ┌───────────────┐ + │ │ + │Trimmed packet │ + │ │ + └───────────────┘ + + ┌─┬─┬─┬─┬────────┐ + │ │ │ │ │ │ + │ │ │ │ │ │ + ┌────────────────► │ │ │ │ │ + │ │ │ │ │ │ │ Queue + │ │ │ │ │ │ │ + │ │ │ │ │ │ │ + │ └─┴─┴─┴─┴────────┘ + ┌──────────────┐ │ + │ │ ┌──────────────────────────────────────────────────────┐ │ ┌─┬─┬─┬─┬─┬─┬─┬─┬┐ + │ │ │ │ │ │ │ │ │ │ │ │ │ ││ + │ │ │ │ │ \ / │ │ │ │ │ │ │ │ ││ + │ │ │ │ │ \ / │ │ │ │ │ │ │ │ ││ + │ Packet │ │ Pipeline ┼────┼───────\────────► │ │ │ │ │ │ │ ││ Queue + │ │ │ │ / \ │ │ │ │ │ │ │ │ ││ + │ │ │ │ / \ │ │ │ │ │ │ │ │ ││ + │ │ └──────────────────────────────────────────────────────┘ └─┴─┴─┴─┴─┴─┴─┴─┴┘ + │ │ + │ │ + │ │ + └──────────────┘ +``` + +This feature assumes that forwarding tables are configured properly, and the original packet would be delivered to the destination successfully if not for the congestion. + +## Spec +There is a tradeoff between trying to configure a higher threshold in a queue buffer profile and trimming the packet. + +If the user chooses to configure higher thresholds for queues, the probability of a drop on a particular queue is lower only if other ports are less congested at the moment. + +However, if all the ports are equally utilized, it makes sense to create a different buffer profile for these queues, with a stricter threshold to have more fairness in shared buffer. + +A static trimming threshold may not be effective with shared buffer switches, where the buffer resources allocated to a queue or port can vary over time. Therefore, we propose adding a new attribute to a buffer profile to allow configuring packet trimming on such stricter profiles: +``` +/** + * @brief Enum defining queue actions in case the packet fails to pass the admission control. + */ +typedef enum _sai_buffer_profile_packet_admission_fail_action_t +{ + /** + * @brief Drop the packet. + * + * Default action. Packet has nowhere to go + * and will be dropped. + */ + SAI_BUFFER_PROFILE_PACKET_ADMISSION_FAIL_ACTION_DROP, + + /** + * @brief Trim the packet. + * + * Try sending a shortened packet over a different + * queue. Original packet will be dropped and trimmed copy of the packet will be send. + * The IP length and checksum fields will be updated in a trimmed copy. + * SAI_QUEUE_STAT_DROPPED_PACKETS as well as SAI_QUEUE_STAT_DROPPED_BYTES + * will count the original discarded frames even if they will be trimmed afterwards. + * Interface statistics must show dropped packets. + * Interface statistics may show sent trimmed packets. + */ + SAI_BUFFER_PROFILE_PACKET_ADMISSION_FAIL_ACTION_DROP_AND_TRIM, +} sai_buffer_profile_packet_admission_fail_action_t; +``` +``` + /** + * @brief Buffer profile discard action + * + * Action to be taken upon packet discard due to + * buffer profile configuration. Applicable only + * when attached to a queue. + * + * @type sai_buffer_profile_packet_admission_fail_action_t + * @flags CREATE_AND_SET + * @default SAI_BUFFER_PROFILE_PACKET_ADMISSION_FAIL_ACTION_DROP + */ + SAI_BUFFER_PROFILE_ATTR_PACKET_ADMISSION_FAIL_ACTION, +``` + +Trimming engine attributes are configured globally. +``` + /** + * @brief Trim packets to this size to reduce bandwidth + * + * @type sai_uint32_t + * @flags CREATE_AND_SET + * @default 128 + */ + SAI_SWITCH_ATTR_PACKET_TRIMMING_SIZE, + + /** + * @brief New packet trimming DSCP value + * + * @type sai_uint8_t + * @flags CREATE_AND_SET + * @default 0 + */ + SAI_SWITCH_ATTR_PACKET_TRIMMING_DSCP_VALUE, + + /** + * @brief Is the new queue index for a trimmed packet mapped from DSCP + * + * @type sai_bool_t + * @flags READ_ONLY + */ + SAI_SWITCH_ATTR_PACKET_TRIMMING_QUEUE_INDEX_MAPPED_FROM_DSCP, + + /** + * @brief New packet trimming queue index + * + * @type sai_uint8_t + * @flags CREATE_AND_SET + * @default 0 + * @validonly SAI_SWITCH_ATTR_PACKET_TRIMMING_QUEUE_INDEX_MAPPED_FROM_DSCP == false + */ + SAI_SWITCH_ATTR_PACKET_TRIMMING_QUEUE_INDEX, +``` + +If more granularity is needed (e.g. trim a specific protocol, or packets within protocol), ACL action is added to disable trimming even if the packet is eligible due to a queue with a buffer profile attached that has trimming enabled. +``` + /** + * @brief Disable packet trimming for a given match condition. + * + * This rule takes effect only when packet trimming is configured on a buffer profile of a queue to which a packet belongs. + * + * @type sai_acl_action_data_t bool + * @flags CREATE_AND_SET + * @default disabled + */ + SAI_ACL_ENTRY_ATTR_ACTION_DISABLE_TRIMMING = SAI_ACL_ENTRY_ATTR_ACTION_START + 0x39, +``` + +Both the queue and the port have the packet counter to reflect the number of trimmed packet. +``` + /** Packets trimmed due to failed admission [uint64_t] */ + SAI_QUEUE_STAT_TRIMMED_PACKETS = 0x00000028, +``` +``` + /** Packets trimmed due to failed shared buffer admission [uint64_t] */ + SAI_PORT_STAT_TRIMMED_PACKETS, +``` diff --git a/inc/saiacl.h b/inc/saiacl.h index df92e7bff..8dee303dd 100644 --- a/inc/saiacl.h +++ b/inc/saiacl.h @@ -288,6 +288,9 @@ typedef enum _sai_acl_action_type_t /** Next Chain Group */ SAI_ACL_ACTION_TYPE_CHAIN_REDIRECT = 0x00000038, + + /** Disable packet trimming */ + SAI_ACL_ACTION_TYPE_DISABLE_TRIMMING = 0x00000039, } sai_acl_action_type_t; /** @@ -3221,10 +3224,21 @@ typedef enum _sai_acl_entry_attr_t */ SAI_ACL_ENTRY_ATTR_ACTION_CHAIN_REDIRECT = SAI_ACL_ENTRY_ATTR_ACTION_START + 0x38, + /** + * @brief Disable packet trimming for a given match condition. + * + * This rule takes effect only when packet trimming is configured on a buffer profile of a queue to which a packet belongs. + * + * @type sai_acl_action_data_t bool + * @flags CREATE_AND_SET + * @default disabled + */ + SAI_ACL_ENTRY_ATTR_ACTION_DISABLE_TRIMMING = SAI_ACL_ENTRY_ATTR_ACTION_START + 0x39, + /** * @brief End of Rule Actions */ - SAI_ACL_ENTRY_ATTR_ACTION_END = SAI_ACL_ENTRY_ATTR_ACTION_CHAIN_REDIRECT, + SAI_ACL_ENTRY_ATTR_ACTION_END = SAI_ACL_ENTRY_ATTR_ACTION_DISABLE_TRIMMING, /** * @brief End of ACL Entry attributes diff --git a/inc/saibuffer.h b/inc/saibuffer.h index 6f87cc5b2..c4672c1c0 100644 --- a/inc/saibuffer.h +++ b/inc/saibuffer.h @@ -590,6 +590,33 @@ typedef enum _sai_buffer_profile_threshold_mode_t } sai_buffer_profile_threshold_mode_t; +/** + * @brief Enum defining queue actions in case the packet fails to pass the admission control. + */ +typedef enum _sai_buffer_profile_packet_admission_fail_action_t +{ + /** + * @brief Drop the packet. + * + * Default action. Packet has nowhere to go + * and will be dropped. + */ + SAI_BUFFER_PROFILE_PACKET_ADMISSION_FAIL_ACTION_DROP, + + /** + * @brief Trim the packet. + * + * Try sending a shortened packet over a different + * queue. Original packet will be dropped and trimmed copy of the packet will be send. + * The IP length and checksum fields will be updated in a trimmed copy. + * SAI_QUEUE_STAT_DROPPED_PACKETS as well as SAI_QUEUE_STAT_DROPPED_BYTES + * will count the original discarded frames even if they will be trimmed afterwards. + * Interface statistics must show dropped packets. + * Interface statistics may show sent trimmed packets. + */ + SAI_BUFFER_PROFILE_PACKET_ADMISSION_FAIL_ACTION_DROP_AND_TRIM, +} sai_buffer_profile_packet_admission_fail_action_t; + /** * @brief Enum defining buffer profile attributes. */ @@ -711,6 +738,19 @@ typedef enum _sai_buffer_profile_attr_t */ SAI_BUFFER_PROFILE_ATTR_XON_OFFSET_TH, + /** + * @brief Buffer profile discard action + * + * Action to be taken upon packet discard due to + * buffer profile configuration. Applicable only + * when attached to a queue. + * + * @type sai_buffer_profile_packet_admission_fail_action_t + * @flags CREATE_AND_SET + * @default SAI_BUFFER_PROFILE_PACKET_ADMISSION_FAIL_ACTION_DROP + */ + SAI_BUFFER_PROFILE_ATTR_PACKET_ADMISSION_FAIL_ACTION, + /** * @brief End of attributes */ diff --git a/inc/saiport.h b/inc/saiport.h index 2eb6961c5..23862a5a2 100644 --- a/inc/saiport.h +++ b/inc/saiport.h @@ -3304,6 +3304,9 @@ typedef enum _sai_port_stat_t /** Count of total bits corrected by FEC. Counter will increment monotonically. */ SAI_PORT_STAT_IF_IN_FEC_CORRECTED_BITS, + /** Packets trimmed due to failed shared buffer admission [uint64_t] */ + SAI_PORT_STAT_TRIMMED_PACKETS, + /** Port stat in drop reasons range start */ SAI_PORT_STAT_IN_DROP_REASON_RANGE_BASE = 0x00001000, diff --git a/inc/saiqueue.h b/inc/saiqueue.h index b7192a861..3aca453fe 100644 --- a/inc/saiqueue.h +++ b/inc/saiqueue.h @@ -418,6 +418,9 @@ typedef enum _sai_queue_stat_t /** Queue delay watermark in nanoseconds [uint64_t] */ SAI_QUEUE_STAT_DELAY_WATERMARK_NS = 0x00000027, + /** Packets trimmed due to failed admission [uint64_t] */ + SAI_QUEUE_STAT_TRIMMED_PACKETS = 0x00000028, + /** Custom range base value */ SAI_QUEUE_STAT_CUSTOM_RANGE_BASE = 0x10000000 diff --git a/inc/saiswitch.h b/inc/saiswitch.h index 1c13bc24c..ad2ecb235 100644 --- a/inc/saiswitch.h +++ b/inc/saiswitch.h @@ -610,6 +610,30 @@ typedef enum _sai_switch_hostif_oper_status_update_mode_t } sai_switch_hostif_oper_status_update_mode_t; +/** + * @brief Attribute data for SAI_SWITCH_ATTR_HOSTIF_OPER_STATUS_UPDATE_MODE. + */ +typedef enum _sai_packet_trimming_queue_resolution_mode_t +{ + /** + * @brief Static queue resolution. + * + * In this mode, a new queue for the trimmed packet is set directly + * by the application. + */ + SAI_PACKET_TRIMMING_QUEUE_RESOLUTION_MODE_STATIC, + + /** + * @brief Dynamic queue resolution. + * + * In this mode, a new queue for the trimmed packet is resolved + * using QOS maps, applied to a new DSCP value that was provided + * for a trimmed packet. + */ + SAI_PACKET_TRIMMING_QUEUE_RESOLUTION_MODE_DYNAMIC, + +} sai_packet_trimming_queue_resolution_mode_t; + /** * @brief Attribute Id in sai_set_switch_attribute() and * sai_get_switch_attribute() calls. @@ -3070,6 +3094,43 @@ typedef enum _sai_switch_attr_t */ SAI_SWITCH_ATTR_SELECTIVE_COUNTER_LIST, + /** + * @brief Trim packets to this size to reduce bandwidth + * + * @type sai_uint32_t + * @flags CREATE_AND_SET + * @default 128 + */ + SAI_SWITCH_ATTR_PACKET_TRIMMING_SIZE, + + /** + * @brief New packet trimming DSCP value + * + * @type sai_uint8_t + * @flags CREATE_AND_SET + * @default 0 + */ + SAI_SWITCH_ATTR_PACKET_TRIMMING_DSCP_VALUE, + + /** + * @brief Queue mapping mode for a trimmed packet + * + * @type sai_packet_trimming_queue_resolution_mode_t + * @flags CREATE_AND_SET + * @default SAI_PACKET_TRIMMING_QUEUE_RESOLUTION_MODE_STATIC + */ + SAI_SWITCH_ATTR_PACKET_TRIMMING_QUEUE_RESOLUTION_MODE, + + /** + * @brief New packet trimming queue index + * + * @type sai_uint8_t + * @flags CREATE_AND_SET + * @default 0 + * @validonly SAI_SWITCH_ATTR_PACKET_TRIMMING_QUEUE_RESOLUTION_MODE == SAI_PACKET_TRIMMING_QUEUE_RESOLUTION_MODE_STATIC + */ + SAI_SWITCH_ATTR_PACKET_TRIMMING_QUEUE_INDEX, + /** * @brief End of attributes */