Note

Access to this page requires authorization. You can try signing in or .

Access to this page requires authorization. You can try .

Winsock explicit congestion notification (ECN)

Introduction

Some applications and/or protocols that are based on the User Datagram Protocol (UDP) (for example, QUIC) seek to leverage the use of explicit congestion notification (ECN) codepoints in order to improve latency and jitter in congested networks.

The Winsock ECN APIs extend the getsockopt/setsockopt interface—as well as the WSASendMsg/LPFN_WSARECVMSG (WSARecvMsg) control message interface—with support for modifying and receiving ECN codepoints in IP headers. The functionality provided allows you to get and set ECN codepoints on a per-packet basis.

For more information regarding ECN, see The Addition of Explicit Congestion Notification (ECN) to IP.

Your application isn't allowed to specify the Congestion Encountered (CE) code point when sending datagrams. The send will return with error WSAEINVAL.

Query ECN with WSAGetRecvIPEcn

Important

WSAGetRecvIPEcn is deprecated and does not properly support dual-stack sockets. Use the IP_RECVECN and IPV6_RECVECN socket options directly with getsockopt instead. On a dual-stack socket that is unbound or bound to a wildcard address, applications need to get both IP_RECVECN (level IPPROTO_IP) and IPV6_RECVECN (level IPPROTO_IPV6) separately. If the socket is bound to a specific IPv6 address, only the IPV6_RECVECN option should be retrieved. If the socket is bound to an IPv4-mapped IPv6 address (for example, ::ffff:192.0.2.1), only the IP_RECVECN option should be retrieved.

WSAGetRecvIPEcn is an inline function, defined in ws2tcpip.h.

Call WSAGetRecvIPEcn to query the current enablement of receiving the IP_ECN (or IPV6_ECN) control message via LPFN_WSARECVMSG (WSARecvMsg).

Also see the WSAMSG structure.

  • Protocol: IPv4

  • Cmsg_level: IPPROTO_IP

  • Cmsg_type: IP_ECN (50 decimal)

  • Description: Specifies/receives ECN codepoint in the Type of Service (TOS) IPv4 header field.

  • Protocol: IPv6

  • Cmsg_level: IPPROTO_IPV6

  • Cmsg_type: IPV6_ECN (50 decimal)

  • Description: Specifies/receives ECN codepoint in the Traffic Class IPv6 header field.

Specify ECN with WSASetRecvIPEcn

Important

WSASetRecvIPEcn is deprecated and does not properly support dual-stack sockets. Use the IP_RECVECN and IPV6_RECVECN socket options directly with setsockopt instead. On a dual-stack socket that is unbound or bound to a wildcard address, applications need to set both IP_RECVECN (level IPPROTO_IP) and IPV6_RECVECN (level IPPROTO_IPV6) separately. If the socket is bound to a specific IPv6 address, only the IPV6_RECVECN option should be set. If the socket is bound to an IPv4-mapped IPv6 address (for example, ::ffff:192.0.2.1), only the IP_RECVECN option should be set.

WSASetRecvIPEcn is an inline function, defined in ws2tcpip.h.

Call WSASetRecvIPEcn to specify whether the IP stack should populate the control buffer with a message containing the ECN codepoint of the Type of Service IPv4 header field (or Traffic Class IPv6 header field) on a received datagram. When set to TRUE, the LPFN_WSARECVMSG (WSARecvMsg) function returns optional control data containing the ECN codepoint of the received datagram. The returned control message type will be IP_ECN (or IPV6_ECN) with level IPPROTO_IP (or IPPROTO_IPV6). The control message data is returned as an INT. This option is valid only on datagram sockets (the socket type must be SOCK_DGRAM).

Code example 1—application advertising ECN support

#define ECN_ECT_0 2

void sendEcn(SOCKET sock, PSOCKADDR_STORAGE addr, LPFN_WSASENDMSG sendmsg, PCHAR data, INT datalen)
{
 DWORD numBytes;
 INT error;

 CHAR control[WSA_CMSG_SPACE(sizeof(INT))] = { 0 };
 WSABUF dataBuf;
 WSABUF controlBuf;
 WSAMSG wsaMsg;
 PCMSGHDR cmsg;

 dataBuf.buf = data;
 dataBuf.len = datalen;
 controlBuf.buf = control;
 controlBuf.len = sizeof(control);
 wsaMsg.name = (PSOCKADDR)addr;
 wsaMsg.namelen = (INT)INET_SOCKADDR_LENGTH(addr->ss_family);
 wsaMsg.lpBuffers = &dataBuf;
 wsaMsg.dwBufferCount = 1;
 wsaMsg.Control = controlBuf;
 wsaMsg.dwFlags = 0;

 cmsg = WSA_CMSG_FIRSTHDR(&wsaMsg);
 cmsg->cmsg_len = WSA_CMSG_LEN(sizeof(INT));
 cmsg->cmsg_level = (addr->ss_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
 cmsg->cmsg_type = (addr->ss_family == AF_INET) ? IP_ECN : IPV6_ECN;
 *(PINT)WSA_CMSG_DATA(cmsg) = ECN_ECT_0;

 error =
 sendmsg(
 sock,
 &wsaMsg,
 0,
 &numBytes,
 NULL,
 NULL);
 if (error == SOCKET_ERROR) {
 printf("sendmsg failed %d\n", WSAGetLastError());
 }
}

Code example 2—application detecting congestion

#define ECN_ECT_CE 3

int recvEcn(SOCKET sock, PSOCKADDR_STORAGE addr, LPFN_WSARECVMSG recvmsg, PCHAR data, INT datalen, PBOOLEAN congestionEncountered)
{
 DWORD numBytes;
 INT error;
 INT ecnVal;
 SOCKADDR_STORAGE remoteAddr = { 0 };

 CHAR control[WSA_CMSG_SPACE(sizeof(INT))] = { 0 };
 WSABUF dataBuf;
 WSABUF controlBuf;
 WSAMSG wsaMsg;
 PCMSGHDR cmsg;

 dataBuf.buf = data;
 dataBuf.len = datalen;
 controlBuf.buf = control;
 controlBuf.len = sizeof(control);
 wsaMsg.name = (PSOCKADDR)&remoteAddr;
 wsaMsg.namelen = sizeof(remoteAddr);
 wsaMsg.lpBuffers = &dataBuf;
 wsaMsg.dwBufferCount = 1;
 wsaMsg.Control = controlBuf;
 wsaMsg.dwFlags = 0;

 *congestionEncountered = FALSE;

 error =
 recvmsg(
 sock,
 &wsaMsg,
 &numBytes,
 NULL,
 NULL);
 if (error == SOCKET_ERROR) {
 printf("recvmsg failed %d\n", WSAGetLastError());
 return -1;
 }

 cmsg = WSA_CMSG_FIRSTHDR(&wsaMsg);
 while (cmsg != NULL) {
 if ((cmsg->cmsg_level == IPPROTO_IP && cmsg->cmsg_type == IP_ECN) ||
 (cmsg->cmsg_level == IPPROTO_IPV6 && cmsg->cmsg_type == IPV6_ECN)) {
 ecnVal = *(PINT)WSA_CMSG_DATA(cmsg);
 if (ecnVal == ECN_ECT_CE) {
 *congestionEncountered = TRUE;
 }
 break;
 }
 cmsg = WSA_CMSG_NXTHDR(&wsaMsg, cmsg);
 }

 return numBytes;
}

void receiver(SOCKET sock, PSOCKADDR_STORAGE addr, LPFN_WSARECVMSG recvmsg)
{
 DWORD numBytes;
 INT error;
 DWORD enabled;
 CHAR data[512];
 BOOLEAN congestionEncountered;

 error = bind(sock, (PSOCKADDR)addr, sizeof(*addr));
 if (error == SOCKET_ERROR) {
 printf("bind failed %d\n", WSAGetLastError());
 return;
 }

 enabled = TRUE;
 if (addr->ss_family == AF_INET) {
 // IPv4 socket: only IP_RECVECN is needed.
 error =
 setsockopt(
 sock,
 IPPROTO_IP,
 IP_RECVECN,
 (PCHAR)&enabled,
 sizeof(enabled));
 if (error == SOCKET_ERROR) {
 printf("setsockopt(IP_RECVECN) failed %d\n", WSAGetLastError());
 return;
 }
 } else if (INETADDR_ISV4MAPPED((PSOCKADDR)addr)) {
 // AF_INET6 socket bound to an IPv4-mapped address
 // (e.g. ::ffff:192.0.2.1): only IP_RECVECN is needed.
 error =
 setsockopt(
 sock,
 IPPROTO_IP,
 IP_RECVECN,
 (PCHAR)&enabled,
 sizeof(enabled));
 if (error == SOCKET_ERROR) {
 printf("setsockopt(IP_RECVECN) failed %d\n", WSAGetLastError());
 return;
 }
 } else if (INETADDR_ISANY((PSOCKADDR)addr)) {
 // AF_INET6 socket bound to the any address (::). Check
 // IPV6_V6ONLY to determine if the socket is dual-stack.
 DWORD v6Only;
 INT v6OnlyLen = sizeof(v6Only);
 error =
 getsockopt(
 sock,
 IPPROTO_IPV6,
 IPV6_V6ONLY,
 (PCHAR)&v6Only,
 &v6OnlyLen);
 if (error == SOCKET_ERROR) {
 printf("getsockopt(IPV6_V6ONLY) failed %d\n", WSAGetLastError());
 return;
 }

 // Always set IPV6_RECVECN for v6 traffic.
 error =
 setsockopt(
 sock,
 IPPROTO_IPV6,
 IPV6_RECVECN,
 (PCHAR)&enabled,
 sizeof(enabled));
 if (error == SOCKET_ERROR) {
 printf("setsockopt(IPV6_RECVECN) failed %d\n", WSAGetLastError());
 return;
 }

 if (!v6Only) {
 // Dual-stack socket: also set IP_RECVECN for v4 traffic.
 error =
 setsockopt(
 sock,
 IPPROTO_IP,
 IP_RECVECN,
 (PCHAR)&enabled,
 sizeof(enabled));
 if (error == SOCKET_ERROR) {
 printf("setsockopt(IP_RECVECN) failed %d\n", WSAGetLastError());
 return;
 }
 }
 } else {
 // AF_INET6 socket bound to a specific IPv6 address: v6 only.
 error =
 setsockopt(
 sock,
 IPPROTO_IPV6,
 IPV6_RECVECN,
 (PCHAR)&enabled,
 sizeof(enabled));
 if (error == SOCKET_ERROR) {
 printf("setsockopt(IPV6_RECVECN) failed %d\n", WSAGetLastError());
 return;
 }
 }

 do {
 numBytes = recvEcn(sock, addr, recvmsg, data, sizeof(data), &congestionEncountered);
 if (congestionEncountered) {
 // Tell sender to slow down
 }
 } while (numBytes > 0);
}

See also


Feedback

Was this page helpful?

Additional resources