From 15cee382a6b52e28c25b844c9fb6af2a76fd2ff5 Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Tue, 17 Sep 2024 14:16:15 -0700 Subject: [PATCH 1/9] Expose the max number of TCP connections as a separate constant. The maximum number of TCP connections is set in an indirect way through `ipconfigTCP_WIN_SEG_COUNT` (see documentation in `FreeRTOSIPConfig.h`). However we need to use it at other places in the code base. Factor it out of `ipconfigTCP_WIN_SEG_COUNT` into a separate constant. Signed-off-by: Hugo Lefeuvre --- lib/tcpip/FreeRTOSIPConfig.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/tcpip/FreeRTOSIPConfig.h b/lib/tcpip/FreeRTOSIPConfig.h index 7851021..29ce9b3 100644 --- a/lib/tcpip/FreeRTOSIPConfig.h +++ b/lib/tcpip/FreeRTOSIPConfig.h @@ -48,11 +48,12 @@ * footprint of 9,984 bytes. * * Note that the pool of descriptors is reallocated at every network stack - * reset. Setting this number to a large value (e.g., 256, the FreeRTOS+TCP - * default) is known to cause reset failures due to fragmentation preventing us - * from reallocating the pool. + * reset. Setting the number of segments to a large value (e.g., 256, the + * FreeRTOS+TCP default) is known to cause reset failures due to fragmentation + * preventing us from reallocating the pool. */ -#define ipconfigTCP_WIN_SEG_COUNT 96 +#define MAX_SIMULTANEOUS_TCP_CONNECTIONS 6 +#define ipconfigTCP_WIN_SEG_COUNT (MAX_SIMULTANEOUS_TCP_CONNECTIONS * 16) /** * Enable counting semaphore functionality in the build, as this is necessary From dafd82bba97db921e64c4b0e42ddb700a3702122 Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Thu, 12 Sep 2024 15:05:05 -0700 Subject: [PATCH 2/9] Add the concept of server ports to the firewall. A server port is a local TCP port which is listened to by an application. Any new TCP connection to a local server port (detected through an incoming SYN) causes the creation of a firewall entry. There is no equivalent concept for UDP, as we do not currently have any use-case for a UDP server that cannot be handled with custom rules (NTP, DNS, DHCP, QUIC). This commit also fixes the documentation of other firewall APIs which do not specify which compartment they should be called from. Signed-off-by: Hugo Lefeuvre --- lib/firewall/firewall.cc | 191 ++++++++++++++++++++++++++++++++++- lib/firewall/firewall.hh | 150 +++++++++++++++++++++++++-- lib/netapi/NetAPI.cc | 4 +- lib/tcpip/network_wrapper.cc | 4 +- 4 files changed, 330 insertions(+), 19 deletions(-) diff --git a/lib/firewall/firewall.cc b/lib/firewall/firewall.cc index 6a8924c..a111739 100644 --- a/lib/firewall/firewall.cc +++ b/lib/firewall/firewall.cc @@ -580,13 +580,64 @@ namespace } } __packed; + static_assert(sizeof(IPv4Header) == 20); + struct TCPUDPCommonPrefix { uint16_t sourcePort; uint16_t destinationPort; } __packed; - static_assert(sizeof(IPv4Header) == 20); + struct TCPHeader + { + /** + * Source port. + */ + uint16_t sourcePort; + /** + * Destination port. + */ + uint16_t destinationPort; + /** + * Sequence number. + */ + uint32_t sequenceNumber; + /** + * Acknowledgement number. + */ + uint32_t acknowledgementNumber; + /** + * Reserved bits, data offset, and flags. + */ + uint16_t bitfield; + /** + * Window size. + */ + uint16_t windowSize; + /** + * Checksum. + */ + uint16_t checksum; + /** + * Urgent pointer. + */ + uint16_t urgentPointer; + } __packed; + + /** + * Masks to extract the value of the SYN and ACK flags in the bitfield + * of a TCP header (`TCPHeader.bitfield`). + * + * Sequence of bits in the bitfield (in network endianness): + * dataOffset : 4 + * reserved : 4 + * cwr : 1, ece : 1, urg : 1, ack : 1 + * psh : 1, rst : 1, syn : 1, fin : 1 + */ + static constexpr const uint16_t TCPBitfieldACKMask = 0x0010; + static constexpr const uint16_t TCPBitfieldSYNMask = 0x0002; + + static_assert(sizeof(TCPHeader) == 20); /** * Simple firewall table for IPv4 endpoints. @@ -602,7 +653,7 @@ namespace class EndpointsTable { /** - * A permitted tuple (source and destination address and port). + * A permitted tuple (source and destination address and port). * * We assume a single local address, so the local address is not stored. */ @@ -615,6 +666,7 @@ namespace // = default. auto operator<=>(const ConnectionTuple &) const = default; // NOLINT }; + SmallTable tcpServerPorts; SmallTable permittedTCPEndpoints; SmallTable permittedUDPEndpoints; FlagLockPriorityInherited permittedEndpointsLock; @@ -647,6 +699,7 @@ namespace auto guardedTable = permitted_endpoints(protocol); auto &[g, table] = guardedTable; table.clear(); + tcpServerPorts.clear(); } void remove_endpoint(IPProtocolNumber protocol, @@ -664,6 +717,24 @@ namespace table.remove(tuple); } + void add_server_port(uint16_t localPort) + { + LockGuard g{permittedEndpointsLock}; + tcpServerPorts.insert(localPort); + } + + void remove_server_port(uint16_t localPort) + { + LockGuard g{permittedEndpointsLock}; + tcpServerPorts.remove(localPort); + } + + bool is_server_port(uint16_t localPort) + { + LockGuard g{permittedEndpointsLock}; + return tcpServerPorts.contains(localPort); + } + void add_endpoint(IPProtocolNumber protocol, Address remoteAddress, uint16_t localPort, @@ -736,6 +807,14 @@ namespace uint32_t dnsServerAddress; _Atomic(uint32_t) dnsIsPermitted; + /** + * `currentClientCount` keeps track of the current number of open + * client connections. When `currentClientCount` reaches + * `FirewallMaximumNumberOfClients`, new incoming TCP connections are + * dropped. See `FirewallMaximumNumberOfClients`. + */ + _Atomic(uint8_t) currentClientCount = 0; + bool packet_filter_ipv4(const uint8_t *data, size_t length, uint32_t(IPv4Header::*remoteAddress), @@ -813,6 +892,55 @@ namespace static_cast(endpoint >> 24) & 0xff); return true; } + // First SYN to a local server port should + // trigger creation of a firewall entry. We + // must do this after checking the table to + // avoid creating an entry multiple times if we + // get multiple SYNs from the same client + // (e.g., retransmissions). + if ((isIngress) && + (ipv4Header->protocol == IPProtocolNumber::TCP) && + (EndpointsTable::instance().is_server_port( + localPortNumber))) + { + if (ipv4Header->body_offset() + sizeof(TCPHeader) > length) + { + Debug::log("Dropping truncated TCP packet of length {}", + length); + return false; + } + auto *tcpHeader = + reinterpret_cast(tcpudpHeader); + if (((ntohs(tcpHeader->bitfield) & TCPBitfieldSYNMask) != + 0) && + ((ntohs(tcpHeader->bitfield) & TCPBitfieldACKMask) == + 0)) + { + if (currentClientCount + 1 >= + FirewallMaximumNumberOfClients) + { + // Maximum number of client connections + // reached. + Debug::log("Maximum number of clients reached, " + "dropping TCP SYN"); + return false; + } + currentClientCount++; + Debug::log("Permitting new client TCP connection from " + "{}.{}.{}.{}:{}", + static_cast(endpoint) & 0xff, + static_cast(endpoint >> 8) & 0xff, + static_cast(endpoint >> 16) & 0xff, + static_cast(endpoint >> 24) & 0xff, + static_cast(ntohs(remotePortNumber))); + EndpointsTable::instance().add_endpoint( + IPProtocolNumber::TCP, + endpoint, + localPortNumber, + remotePortNumber); + return true; + } + } // Permit DHCP replies if (is_dhcp_reply(ipv4Header->protocol, isIngress, @@ -1015,6 +1143,16 @@ void firewall_permit_dns(bool dnsIsPermitted) ::dnsIsPermitted += dnsIsPermitted ? 1 : -1; } +void firewall_add_tcpipv4_server_port(uint16_t localPort) +{ + EndpointsTable::instance().add_server_port(localPort); +} + +void firewall_remove_tcpipv4_server_port(uint16_t localPort) +{ + EndpointsTable::instance().remove_server_port(localPort); +} + void firewall_add_tcpipv4_endpoint(uint32_t remoteAddress, uint16_t localPort, uint16_t remotePort) @@ -1031,12 +1169,29 @@ void firewall_add_udpipv4_endpoint(uint32_t remoteAddress, IPProtocolNumber::UDP, remoteAddress, localPort, remotePort); } -void firewall_remove_tcpipv4_endpoint(uint16_t localPort) +void firewall_remove_tcpipv4_local_endpoint(uint16_t localPort) { + // Server ports are likely to be associated to more than one entry in + // the firewall. + Debug::Assert( + !EndpointsTable::instance().is_server_port(localPort), + "Trying to remove a local endpoint on a server port."); EndpointsTable::instance().remove_endpoint(IPProtocolNumber::TCP, localPort); } +void firewall_remove_tcpipv4_remote_endpoint(uint32_t remoteAddress, + uint16_t localPort, + uint16_t remotePort) +{ + EndpointsTable::instance().remove_endpoint( + IPProtocolNumber::TCP, remoteAddress, localPort, remotePort); + if (EndpointsTable::instance().is_server_port(localPort)) + { + currentClientCount--; + } +} + void firewall_remove_udpipv4_local_endpoint(uint16_t localPort) { EndpointsTable::instance().remove_endpoint(IPProtocolNumber::UDP, @@ -1105,6 +1260,16 @@ namespace } // namespace #if CHERIOT_RTOS_OPTION_IPv6 +void firewall_add_tcpipv6_server_port(uint16_t localPort) +{ + EndpointsTable::instance().add_server_port(localPort); +} + +void firewall_remove_tcpipv6_server_port(uint16_t localPort) +{ + EndpointsTable::instance().remove_server_port(localPort); +} + void firewall_add_tcpipv6_endpoint(uint8_t *remoteAddress, uint16_t localPort, uint16_t remotePort) @@ -1127,12 +1292,30 @@ void firewall_add_udpipv6_endpoint(uint8_t *remoteAddress, } } -void firewall_remove_tcpipv6_endpoint(uint16_t localPort) +void firewall_remove_tcpipv6_local_endpoint(uint16_t localPort) { + Debug::Assert( + !EndpointsTable::instance().is_server_port(localPort), + "Trying to remove a local endpoint on a server port."); EndpointsTable::instance().remove_endpoint( IPProtocolNumber::TCP, localPort); } +void firewall_remove_tcpipv6_remote_endpoint(uint8_t *remoteAddress, + uint16_t localPort, + uint16_t remotePort) +{ + if (auto copy = copy_address(remoteAddress)) + { + EndpointsTable::instance().remove_endpoint( + IPProtocolNumber::TCP, *copy, localPort, remotePort); + if (EndpointsTable::instance().is_server_port(localPort)) + { + currentClientCount--; + } + } +} + void firewall_remove_udpipv6_local_endpoint(uint16_t localPort) { EndpointsTable::instance().remove_endpoint( diff --git a/lib/firewall/firewall.hh b/lib/firewall/firewall.hh index 058932d..870fe1c 100644 --- a/lib/firewall/firewall.hh +++ b/lib/firewall/firewall.hh @@ -8,6 +8,8 @@ /** * Send a frame through the on-device firewall. This returns true if the * packet is successfully sent, false otherwise. + * + * This should only be called from the TCP/IP compartment. */ bool __cheri_compartment("Firewall") ethernet_send_frame(uint8_t *packet, size_t length); @@ -20,6 +22,8 @@ bool __cheri_compartment("Firewall") * This returns true if the driver is successfully started, false otherwise. * This should fail only if the driver is already initialised (outside of a * reset), or if `state` is invalid. + * + * This should only be called from the TCP/IP compartment. */ bool __cheri_compartment("Firewall") ethernet_driver_start(std::atomic *state); @@ -33,9 +37,22 @@ bool __cheri_compartment("Firewall") */ static constexpr uint32_t RestartStateDriverKickedBit = 0x4; +/** + * Each new TCP connection to a local server port causes the creation of a + * firewall entry. To prevent this mechanism from being abused by remote + * attackers to DoS the system (the creation of firewall hole may cause an + * allocation to enlarge the table), the maximum number of concurrent TCP + * connections is limited to `MaxClientCount`. + * + * When this maximum is reached, new incoming TCP connections are dropped. + */ +static constexpr const uint8_t FirewallMaximumNumberOfClients = 6; + /** * Query the link status of the Firewall driver. This returns true if the link * is up, false otherwise. + * + * This should only be called from the TCP/IP compartment. */ bool __cheri_compartment("Firewall") ethernet_link_is_up(); @@ -48,6 +65,8 @@ bool __cheri_compartment("TCPIP") /** * Set the IP address of the DNS server to use. Packets to and from this * address will be permitted by the firewall while DNS queries are in progress. + * + * This should only be called from the TCP/IP compartment. */ void __cheri_compartment("Firewall") firewall_dns_server_ip_set(uint32_t ip); @@ -87,33 +106,82 @@ void __cheri_compartment("Firewall") /** * Close a hole in the firewall for TCP packets to and from the given endpoint. * + * `localPort` must not be a server port, as server port can be connected to + * many remote endpoints. + * * This is called from the TCP/IP compartment when a TCP connection is closed. * This is not a security risk, the worst that the TCP/IP compartment can do by * calling it is DoS itself. There is limited risk that it would fail to call * it when a connection should be closed. */ void __cheri_compartment("Firewall") - firewall_remove_tcpipv4_endpoint(uint16_t localPort); + firewall_remove_tcpipv4_local_endpoint(uint16_t localPort); + +/** + * Remove a specific remote TCP endpoint from the firewall. + * + * This is called from the TCP/IP compartment when a TCP connection is closed + * (see discussion in `firewall_remove_tcpipv4_local_endpoint`). + */ +void __cheri_compartment("Firewall") + firewall_remove_tcpipv4_remote_endpoint(uint32_t remoteAddress, + uint16_t localPort, + uint16_t remotePort); /** * Close a hole in the firewall for UDP packets to and from the given endpoint. * - * This is called from the TCP/IP compartment when a TCP connection is closed. - * This is not a security risk, the worst that the TCP/IP compartment can do by - * calling it is DoS itself. There is limited risk that it would fail to call - * it when a connection should be closed. + * This is called from the TCP/IP compartment when a UDP "connection" is + * closed. This is not a security risk, the worst that the TCP/IP compartment + * can do by calling it is DoS itself. There is limited risk that it would + * fail to call it when a connection should be closed. */ void __cheri_compartment("Firewall") firewall_remove_udpipv4_local_endpoint(uint16_t endpoint); /** * Remove a specific remote UDP endpoint from the firewall. + * + * This is called from the TCP/IP compartment when a UDP "connection" is closed + * (see discussion in `firewall_remove_udpipv4_local_endpoint`). */ void __cheri_compartment("Firewall") firewall_remove_udpipv4_remote_endpoint(uint32_t remoteAddress, uint16_t localPort, uint16_t remotePort); +/** + * Register a local TCP port as server port into the firewall. + * + * Any new incoming TCP connection to that port will trigger the creation of a + * hole in the firewall for TCP packets from that endpoint and port to the + * local TCP server port. + * + * New TCP client connections are identified by incoming TCP SYN packets. + * + * To prevent this mechanism from being used as a vector for DoS (since the + * creation of firewall hole may cause an allocation to enlarge the table), the + * maximum number of concurrent TCP connections is limited (see + * `MaxClientCount` in `firewall.cc`). Past that point, new incoming TCP + * connections will be dropped until a slot is freed (through a connection + * being terminated). + * + * This should be called only by the NetAPI compartment. + */ +void __cheri_compartment("Firewall") + firewall_add_tcpipv4_server_port(uint16_t localPort); + +/** + * Remove a server port from the firewall. + * + * This is called from the TCP/IP compartment when a TCP connection is closed. + * Similarly to `firewall_remove_tcpipv4_local_endpoint`, this is not a + * security risk: the worst that the TCP/IP compartment can do by calling it is + * DoS itself. + */ +void __cheri_compartment("Firewall") + firewall_remove_tcpipv4_server_port(uint16_t localPort); + #if CHERIOT_RTOS_OPTION_IPv6 /** * Open a hole in the firewall for TCP packets to and from the given endpoint. @@ -142,32 +210,70 @@ void __cheri_compartment("Firewall") /** * Close a hole in the firewall for TCP packets to and from the given endpoint. * + * `localPort` must not be a server port, as server port can be connected to + * many remote endpoints. + * * This is called from the TCP/IP compartment when a TCP connection is closed. * This is not a security risk, the worst that the TCP/IP compartment can do by * calling it is DoS itself. There is limited risk that it would fail to call * it when a connection should be closed. */ void __cheri_compartment("Firewall") - firewall_remove_tcpipv6_endpoint(uint16_t localPort); + firewall_remove_tcpipv6_local_endpoint(uint16_t localPort); + +/** + * Remove a specific remote TCP endpoint from the firewall. + * + * This is called from the TCP/IP compartment when a TCP connection is closed + * (see discussion in `firewall_remove_tcpipv6_local_endpoint`). + */ +void __cheri_compartment("Firewall") + firewall_remove_tcpipv6_remote_endpoint(uint8_t *remoteAddress, + uint16_t localPort, + uint16_t remotePort); /** * Close a hole in the firewall for UDP packets to and from the given endpoint. * - * This is called from the TCP/IP compartment when a TCP connection is closed. - * This is not a security risk, the worst that the TCP/IP compartment can do by - * calling it is DoS itself. There is limited risk that it would fail to call - * it when a connection should be closed. + * This is called from the TCP/IP compartment when a UDP "connection" is + * closed. This is not a security risk, the worst that the TCP/IP compartment + * can do by calling it is DoS itself. There is limited risk that it would + * fail to call it when a connection should be closed. */ void __cheri_compartment("Firewall") firewall_remove_udpipv6_local_endpoint(uint16_t endpoint); /** * Remove a specific remote UDP endpoint from the firewall. + * + * This is called from the TCP/IP compartment when a UDP "connection" is closed + * (see discussion in `firewall_remove_udpipv6_local_endpoint`). */ void __cheri_compartment("Firewall") firewall_remove_udpipv6_remote_endpoint(uint8_t *remoteAddress, uint16_t localPort, uint16_t remotePort); + +/** + * Register a local TCP port as server port. + * + * Any new incoming TCP connection to that port will trigger the creation of a + * hole in the firewall for TCP packets from that endpoint and port to the + * local TCP server port. + * + * See `firewall_add_tcpipv4_server_port` for more information. + * + * This should be called only by the NetAPI compartment. + */ +void __cheri_compartment("Firewall") + firewall_add_tcpipv6_server_port(uint16_t localPort); + +/** + * Remove a server port from the firewall. + */ +void __cheri_compartment("Firewall") + firewall_remove_tcpipv6_server_port(uint16_t localPort); + #else __always_inline static inline void firewall_add_tcpipv6_endpoint(uint8_t *remoteAddress, @@ -187,7 +293,15 @@ firewall_add_udpipv6_endpoint(uint8_t *remoteAddress, } __always_inline static inline void -firewall_remove_tcpipv6_endpoint(uint16_t localPort) +firewall_remove_tcpipv6_local_endpoint(uint16_t localPort) +{ + Debug::Assert( + false, "{} not supported with IPv6 disabled", __PRETTY_FUNCTION__); +} +__always_inline static inline void +firewall_remove_tcpipv6_remote_endpoint(uint8_t *remoteAddress, + uint16_t localPort, + uint16_t remotePort) { Debug::Assert( false, "{} not supported with IPv6 disabled", __PRETTY_FUNCTION__); @@ -207,6 +321,20 @@ firewall_remove_udpipv6_remote_endpoint(uint8_t *remoteAddress, Debug::Assert( false, "{} not supported with IPv6 disabled", __PRETTY_FUNCTION__); } + +__always_inline static inline void +firewall_add_tcpipv6_server_port(uint16_t localPort) +{ + Debug::Assert( + false, "{} not supported with IPv6 disabled", __PRETTY_FUNCTION__); +} + +__always_inline static inline void +firewall_remove_tcpipv6_server_port(uint16_t localPort) +{ + Debug::Assert( + false, "{} not supported with IPv6 disabled", __PRETTY_FUNCTION__); +} #endif /** diff --git a/lib/netapi/NetAPI.cc b/lib/netapi/NetAPI.cc index a42a270..974274d 100644 --- a/lib/netapi/NetAPI.cc +++ b/lib/netapi/NetAPI.cc @@ -120,11 +120,11 @@ SObj network_socket_connect_tcp(Timeout *timeout, timeout->elapse(t.elapsed); if (isIPv6) { - firewall_remove_tcpipv6_endpoint(ntohs(host->port)); + firewall_remove_tcpipv6_local_endpoint(ntohs(host->port)); } else { - firewall_remove_tcpipv4_endpoint(ntohs(host->port)); + firewall_remove_tcpipv4_local_endpoint(ntohs(host->port)); } sealedSocket = nullptr; } diff --git a/lib/tcpip/network_wrapper.cc b/lib/tcpip/network_wrapper.cc index 6762014..effc4d5 100644 --- a/lib/tcpip/network_wrapper.cc +++ b/lib/tcpip/network_wrapper.cc @@ -692,7 +692,7 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket) { if (isTCP) { - firewall_remove_tcpipv6_endpoint(localPort); + firewall_remove_tcpipv6_local_endpoint(localPort); } else { @@ -703,7 +703,7 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket) { if (isTCP) { - firewall_remove_tcpipv4_endpoint(localPort); + firewall_remove_tcpipv4_local_endpoint(localPort); } else { From b9867b582efb1636e900139f232285b35b438d36 Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Fri, 4 Oct 2024 22:39:02 -0700 Subject: [PATCH 3/9] Don't (re-)close the firewall hole in `network_socket_close` after a reset. The reset process flushes all firewall tables, so no need to close the firewall holes again when calling `network_socket_close` on a socket from an older instance of the stack. Signed-off-by: Hugo Lefeuvre --- lib/tcpip/network_wrapper.cc | 55 ++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/lib/tcpip/network_wrapper.cc b/lib/tcpip/network_wrapper.cc index effc4d5..8fd86f6 100644 --- a/lib/tcpip/network_wrapper.cc +++ b/lib/tcpip/network_wrapper.cc @@ -676,38 +676,43 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket) return -EINVAL; } bool isTCP = socket->socket->ucProtocol == FREERTOS_IPPROTO_TCP; - // Shut down the socket before closing the firewall. - // Don't bother with the return value: - // `FreeRTOS_shutdown` fails mainly if the TCP - // connection is dead, which is likely to happen in - // practice and has no impact for us. Don't call - // `FreeRTOS_shutdown` if the socket is coming from a - // previous instance of the network stack. + // Shut down the socket and close the firewall. + // + // Don't call `FreeRTOS_shutdown` if the socket is + // coming from a previous instance of the network + // stack (the socket is invalid anyways). Don't close + // the firewall either as this was already done + // during the reset. if (socket->socketEpoch == currentSocketEpoch.load()) { + // Do not bother with the return value: + // `FreeRTOS_shutdown` fails if the TCP + // connection is dead, which is likely to + // happen in practice and has no impact here. FreeRTOS_shutdown(socket->socket, FREERTOS_SHUT_RDWR); - } - auto localPort = ntohs(socket->socket->usLocalPort); - if (socket->socket->bits.bIsIPv6) - { - if (isTCP) - { - firewall_remove_tcpipv6_local_endpoint(localPort); - } - else - { - firewall_remove_udpipv6_local_endpoint(localPort); - } - } - else - { - if (isTCP) + + auto localPort = ntohs(socket->socket->usLocalPort); + if (socket->socket->bits.bIsIPv6) { - firewall_remove_tcpipv4_local_endpoint(localPort); + if (isTCP) + { + firewall_remove_tcpipv6_local_endpoint(localPort); + } + else + { + firewall_remove_udpipv6_local_endpoint(localPort); + } } else { - firewall_remove_udpipv4_local_endpoint(localPort); + if (isTCP) + { + firewall_remove_tcpipv4_local_endpoint(localPort); + } + else + { + firewall_remove_udpipv4_local_endpoint(localPort); + } } } int ret = 0; From 20d4154bb9e660030872a4986c60a258248d0d90 Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Mon, 16 Sep 2024 15:26:53 -0700 Subject: [PATCH 4/9] Implement the new network stack server API. The new server API allows application-level code to create listening TCP sockets bound to given server ports, and accept connections on a listening socket. The permission to open a given server port is encoded in bind capabilities: capabilities sealed with a NetAPI-owned key that permit the opening of a TCP server port on either IPv4 or IPv6, and enforce a limit on the maximum number of concurrent TCP connections that may be opened on the server port. Important implementation point here: we use socket callbacks to handle the situation where a three-way TCP handshake initiated by a peer on a listening socket fails. In this case the firewall hole will be closed by the socket disconnection callback and not by a call to `FreeRTOS_closesocket`. See comment in `network_wrapper.cc` for more details. This behavior can easily be tested by sending a lone TCP SYN, e.g., through: hping3 -c 1 -S -p $port $ip Signed-off-by: Hugo Lefeuvre --- include/NetAPI.h | 90 +++++++++++ lib/netapi/NetAPI.cc | 65 ++++++++ lib/tcpip/FreeRTOSIPConfig.h | 6 + lib/tcpip/network-internal.h | 17 ++- lib/tcpip/network_wrapper.cc | 287 ++++++++++++++++++++++++++++++++++- 5 files changed, 457 insertions(+), 8 deletions(-) diff --git a/include/NetAPI.h b/include/NetAPI.h index 2c084a0..8fc5d80 100644 --- a/include/NetAPI.h +++ b/include/NetAPI.h @@ -60,6 +60,43 @@ SObj __cheri_compartment("NetAPI") SObj mallocCapability, SObj hostCapability); +/** + * Create a listening TCP socket bound to a given port. + * + * The `mallocCapability` argument is used to allocate memory for the socket + * and must have sufficient quota remaining for the socket. + * + * The `bindCapability` argument is a capability authorising the bind to a + * specific server port. + * + * This returns a valid sealed capability to a socket on success, or a null on + * failure. + */ +SObj __cheri_compartment("NetAPI") + network_socket_listen_tcp(Timeout *timeout, + SObj mallocCapability, + SObj bindCapability); + +/** + * Accept a connection on a listening socket. + * + * This function will block until a connection is established or the timeout is + * reached. + * + * The `address` and `port` arguments are used to return the address and port + * of the connected client. These can be null if the caller is not interested + * in the client's address or port. + * + * This returns a valid sealed capability to a connected socket on success, or + * a null on failure. + */ +SObj __cheri_compartment("TCPIP") + network_socket_accept_tcp(Timeout *timeout, + SObj mallocCapability, + SObj listeningSocket, + NetworkAddress *address, + uint16_t *port); + /** * Create a bound UDP socket, allocated from the quota associated with * `mallocCapability`. This will use IPv4 if `isIPv6` is false, or IPv6 if it @@ -288,6 +325,40 @@ struct ConnectionCapability char hostname[]; }; +/** + * Bind capability contents. Instances of this sealed with the NetworkBindKey + * sealing capability authorise binding to a specific server port. + * + * Similarly to `ConnectionCapability`, this is probably to inflexible for the + * general case. For example: with IPv6 there are always multiple interfaces + * (link-local and routable), and it might be necessary to expose this to allow + * API users to specify which interface they want to bind on. + */ +struct BindCapability +{ + /** + * Allow to bind on an IPv6 or IPv4 interface. + * + * Note that the "or" is exclusive here: setting `isIPv6` to `true` + * will only allow binding onto an IPv6 interface. To allow both IPv4 + * and IPv6, two bind capabilities must be created. + */ + bool isIPv6; + /** + * The server port this bind capability allows to bind to. This is + * provided in host byte order. + */ + uint16_t port; + /** + * Maximum number of concurrent TCP connections allowed on this server + * port. Once this number is reached, further connections to the server + * port will be denied. If this number is larger than supported by the + * network stack, the network stack will default to its own maximum. + * A value of 0 indicates unlimited. + */ + uint16_t maximumNumberOfConcurrentTCPConnections; +}; + /** * Define a capability that authorises connecting to a specific host and port * with UDP or TCP. @@ -308,3 +379,22 @@ struct ConnectionCapability portNumber, \ sizeof(authorisedHost), \ authorisedHost) + +/** + * Define a capability that authorises binding to a specific server port with + * TCP. Binding to a server port with UDP is not supported. + */ +#define DECLARE_AND_DEFINE_BIND_CAPABILITY( \ + name, isIPv6Binding, portNumber, maxConnections) \ + DECLARE_AND_DEFINE_STATIC_SEALED_VALUE( \ + struct { \ + bool isIPv6; \ + uint16_t port; \ + uint16_t maximumNumberOfConcurrentTCPConnections; \ + }, \ + NetAPI, \ + NetworkBindKey, \ + name, \ + isIPv6Binding, \ + portNumber, \ + maxConnections) diff --git a/lib/netapi/NetAPI.cc b/lib/netapi/NetAPI.cc index 974274d..d45feba 100644 --- a/lib/netapi/NetAPI.cc +++ b/lib/netapi/NetAPI.cc @@ -36,6 +36,14 @@ namespace { return STATIC_SEALING_TYPE(NetworkConnectionKey); } + + /** + * Returns the sealing key used for a server port. + */ + __always_inline SKey bind_capability_key() + { + return STATIC_SEALING_TYPE(NetworkBindKey); + } } // namespace SObj network_socket_connect_tcp(Timeout *timeout, @@ -131,6 +139,63 @@ SObj network_socket_connect_tcp(Timeout *timeout, return sealedSocket; } +SObj network_socket_listen_tcp(Timeout *timeout, + SObj mallocCapability, + SObj bindCapability) +{ + if (!check_timeout_pointer(timeout)) + { + return nullptr; + } + Sealed sealedBindCapability{bindCapability}; + auto *serverPort = + token_unseal(bind_capability_key(), sealedBindCapability); + if (serverPort == nullptr) + { + Debug::log("Failed to unseal bind capability"); + return nullptr; + } + + if constexpr (!UseIPv6) + { + if (serverPort->isIPv6) + { + Debug::log("IPv6 is not supported"); + return nullptr; + } + } + + // Create a standard TCP socket in listening mode. + CHERI::Capability sealedSocket = network_socket_create_and_bind( + timeout, + mallocCapability, + serverPort->isIPv6, + ConnectionTypeTCP, + serverPort->port, + true /* listening mode */, + serverPort->maximumNumberOfConcurrentTCPConnections); + if (!sealedSocket.is_valid()) + { + Debug::log("Failed to create socket"); + return nullptr; + } + + // Register the server port. + if (serverPort->isIPv6) + { + if constexpr (UseIPv6) + { + firewall_add_tcpipv6_server_port(ntohs(serverPort->port)); + } + } + else + { + firewall_add_tcpipv4_server_port(ntohs(serverPort->port)); + } + + return sealedSocket; +} + NetworkAddress network_socket_udp_authorise_host(Timeout *timeout, SObj socket, SObj hostCapability) diff --git a/lib/tcpip/FreeRTOSIPConfig.h b/lib/tcpip/FreeRTOSIPConfig.h index 29ce9b3..112a352 100644 --- a/lib/tcpip/FreeRTOSIPConfig.h +++ b/lib/tcpip/FreeRTOSIPConfig.h @@ -17,6 +17,12 @@ */ #include +/** + * Enable socket callbacks. We use FREERTOS_SO_TCP_CONN_HANDLER, which notifies + * us when a TCP connection is terminated (see `network_wrapper.cc`). + */ +#define ipconfigUSE_CALLBACKS 1 + /** * `INCLUDE_*` macros have no effect here since we do not use the FreeRTOS * core, however FreeRTOS+TCP refuses to compile without defining these. diff --git a/lib/tcpip/network-internal.h b/lib/tcpip/network-internal.h index a3cda9d..6291cee 100644 --- a/lib/tcpip/network-internal.h +++ b/lib/tcpip/network-internal.h @@ -31,6 +31,19 @@ __cheri_compartment("TCPIP") int network_host_resolve( * Create a socket and bind it to the given address. The socket will be * allocated with the malloc capability. * + * The socket will be bound to any passed non-zero `localPort`. Otherwise, a + * random local port will be selected. + * + * If `isListening` is set, the socket will be marked as a passive socket which + * can be used to accept incoming connections (see + * `network_socket_accept_tcp`). + * + * If `isListening` and `maxConnections` are set, `maxConnections` limits the + * maximum number of concurrent TCP connections allowed on the server port. + * Once this number is reached, further connections to the server port will be + * denied. If this number is larger than supported by the network stack, the + * network stack will default to its own maximum. + * * This returns a sealed capability to a socket on success, or null on failure. * * This should be called only from the NetAPI compartment. @@ -40,7 +53,9 @@ SObj __cheri_compartment("TCPIP") SObj mallocCapability, bool isIPv6, ConnectionType type, - uint16_t localPort = 0); + uint16_t localPort = 0, + bool isListening = false, + uint16_t maxConnections = 0); /** * Connect a TCP socket to the given address. diff --git a/lib/tcpip/network_wrapper.cc b/lib/tcpip/network_wrapper.cc index 8fd86f6..3e06ba8 100644 --- a/lib/tcpip/network_wrapper.cc +++ b/lib/tcpip/network_wrapper.cc @@ -22,7 +22,14 @@ using Debug = ConditionalDebug; #include "../firewall/firewall.hh" +/** + * Statically match constants from the firewall which must stay in sync with + * the TCP/IP stack. + */ +static_assert(FirewallMaximumNumberOfClients == + MAX_SIMULTANEOUS_TCP_CONNECTIONS); static_assert(RestartStateDriverKickedBit == DriverKicked); + constexpr bool UseIPv6 = CHERIOT_RTOS_OPTION_IPv6; // IP thread global lock. See comment in `FreeRTOS_IP_wrapper.c`. @@ -484,11 +491,76 @@ int network_host_resolve(const char *hostname, -1 /* invalid if we are restarting */); } +/** + * Callback called by FreeRTOS+TCP when a TCP connection is created or + * terminated. + * + * We use this callback to handle the case where a three-way TCP handshake + * initiated by a peer on a listening socket fails. + * + * Rationale: + * The firewall automatically opens a hole as soon as it sees a SYN sent by a + * new peer to a server port. This hole is supposed to be closed by + * `FreeRTOS_closesocket` when the connection is terminated. However, if the + * three-way handshake does not succeed (e.g., the peer only sends a SYN), the + * TCP connection will not be created and thus no socket will be created. This + * means that `FreeRTOS_closesocket` will never be called. In that case we must + * close the firewall hole ourselves. Fortunately, this callback is called when + * an attempted three-way-handshake failed. We can use it to tell the firewall + * to remove the hole. + * + * It is also worth noting that this will never be called if *we* close the + * socket through `FreeRTOS_closesocket`. + * + * This is because FreeRTOS+TCP does not do a three-way handshake close when + * the socket is closed through `FreeRTOS_socketclose()`. Instead, FreeRTOS+TCP + * sends a FIN to the peer and destroys the socket immediately. Any further + * packet from the peer is answered with a RST (as a spurious packet). Because + * the socket is destroyed in a state when the TCP connection has not been + * properly closed (it is just in the "FIN sent" stage, i.e., FIN Wait-1), the + * callback is not called. + */ +static void on_tcp_connect(Socket_t socket, BaseType_t isConnected) +{ + if (!isConnected) + { + /** + * A TCP connection was closed. Close the corresponding + * firewall hole. + * + * Note that this is called from the TCP/IP thread, i.e., it + * cannot possibly race with a free of the `socket`, even if + * `network_socket_close` is called concurrently. + */ + struct freertos_sockaddr address = {0}; + FreeRTOS_GetRemoteAddress(socket, &address); + Debug::log("A connection was closed on port {} from remote port {}", + static_cast(socket->usLocalPort), + static_cast(ntohs(address.sin_port))); + auto localPort = htons(socket->usLocalPort); + if (socket->bits.bIsIPv6) + { + firewall_remove_tcpipv6_remote_endpoint( + address.sin_address.xIP_IPv6.ucBytes, + localPort, + address.sin_port); + } + else + { + firewall_remove_tcpipv4_remote_endpoint( + address.sin_address.ulIP_IPv4, localPort, address.sin_port); + } + } +} +F_TCP_UDP_Handler_t onTCPConnectCallback = {on_tcp_connect}; + SObj network_socket_create_and_bind(Timeout *timeout, SObj mallocCapability, bool isIPv6, ConnectionType type, - uint16_t localPort) + uint16_t localPort, + bool isListening, + uint16_t maxConnections) { return with_restarting_checks( [&]() -> SObj { @@ -574,6 +646,42 @@ SObj network_socket_create_and_bind(Timeout *timeout, mallocCapability, socket_key(), sealedSocket); return nullptr; } + + if (isListening) + { + if (maxConnections != 0) + { + maxConnections = std::min( + maxConnections, MAX_SIMULTANEOUS_TCP_CONNECTIONS); + } + else + { + maxConnections = MAX_SIMULTANEOUS_TCP_CONNECTIONS; + } + auto listenResult = FreeRTOS_listen(socket, maxConnections); + if (listenResult != 0) + { + // See above comments. Note that this + // failure case is not supposed to + // happen since we know that the + // socket is valid and bound. + Debug::log("Failed to set socket into listen mode."); + // No need to acquire the lock here + // since we still have it. + ds::linked_list::remove(&(socketWrapper->ring)); + close_socket_retry(timeout, socket); + token_obj_destroy( + mallocCapability, socket_key(), sealedSocket); + return nullptr; + } + + FreeRTOS_setsockopt( + socket, + 0, + FREERTOS_SO_TCP_CONN_HANDLER, + static_cast(&onTCPConnectCallback), + sizeof(onTCPConnectCallback)); + } } else { @@ -590,6 +698,128 @@ SObj network_socket_create_and_bind(Timeout *timeout, static_cast(nullptr) /* return nullptr if we are restarting */); } +SObj network_socket_accept_tcp(Timeout *timeout, + SObj mallocCapability, + SObj sealedListeningSocket, + NetworkAddress *address, + uint16_t *port) +{ + SObj socket = nullptr; + with_sealed_socket( + [&](SealedSocket *listeningSocket) { + if (!check_timeout_pointer(timeout)) + { + return -EINVAL; + } + + // Create a sealed socket wrapper for the accepted connection. + auto [socketWrapper, sealedSocket] = token_allocate( + timeout, mallocCapability, socket_key()); + if (socketWrapper == nullptr) + { + Debug::log("Failed to allocate socket wrapper."); + return -EINVAL; + } + + socketWrapper->socketEpoch = currentSocketEpoch.load(); + + struct freertos_sockaddr addressTmp; + uint32_t addressLength = sizeof(addressTmp); + auto rawSocket = FreeRTOS_accept( + listeningSocket->socket, &addressTmp, &addressLength); + if (rawSocket == nullptr) + { + Debug::log("Failed to create socket."); + // This cannot fail unless buggy - we know that we + // successfully allocated the token with this malloc + // capability. Same for other calls to `token_obj_destroy` + // in this function. + token_obj_destroy(mallocCapability, socket_key(), sealedSocket); + return -EINVAL; + } + socketWrapper->socket = rawSocket; + + // Claim the socket so that it counts towards the caller's quota. The + // network stack also keeps a claim to it. We will drop this claim on + // deallocation. + Claim c{mallocCapability, rawSocket}; + if (!c) + { + Debug::log("Failed to claim socket."); + // Note that `close_socket_retry` can fail, in which case we + // will leak the socket allocation. There is nothing we can do + // here. Returning the half-baked sealed socket would be + // dangerous because the close() path isn't designed to handle + // it (and changing it to do so is non-trivial and likely not + // worth the trouble). + close_socket_retry(timeout, rawSocket); + // Cannot fail, see above. + token_obj_destroy(mallocCapability, socket_key(), sealedSocket); + return -EINVAL; + } + + if (LockGuard g{sealedSocketsListLock, timeout}) + { + // Add the socket to the sealed socket reset list. + socketWrapper->ring.cell_reset(); + sealedSockets.append(&(socketWrapper->ring)); + } + else + { + // See above comments. + Debug::log("Failed to add socket to the socket reset list."); + close_socket_retry(timeout, rawSocket); + token_obj_destroy(mallocCapability, socket_key(), sealedSocket); + return -EINVAL; + } + + // Set `address`. + if ((heap_claim_fast(timeout, address) < 0) || + (!check_pointer(address))) + { + // There is not much we can do if this happens. The + // caller passed an invalid `address` or freed it + // concurrently: we can simply ignore, they are + // shooting themselves in the foot by making + // `address` unusable. Returning nullptr at that + // stage would force us to close the socket, destroy + // the token, etc. + Debug::log("Invalid address pointer"); + } + else + { + if (addressTmp.sin_family == FREERTOS_AF_INET6) + { + address->kind = NetworkAddress::AddressKindIPv6; + memcpy( + address->ipv6, addressTmp.sin_address.xIP_IPv6.ucBytes, 16); + } + else + { + address->kind = NetworkAddress::AddressKindIPv4; + address->ipv4 = addressTmp.sin_address.ulIP_IPv4; + } + } + // Set `port`. + if ((heap_claim_fast(timeout, port) < 0) || + (!check_pointer(port))) + { + // Same comment as earlier for `address`. + Debug::log("Invalid port pointer"); + } + else + { + *port = ntohs(addressTmp.sin_port); + } + + c.release(); + socket = sealedSocket; + return 0; + }, + sealedListeningSocket); + return socket; +} + int network_socket_connect_tcp_internal(Timeout *timeout, SObj socket, NetworkAddress address, @@ -685,18 +915,44 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket) // during the reset. if (socket->socketEpoch == currentSocketEpoch.load()) { + auto rawSocket = socket->socket; + // Do not bother with the return value: // `FreeRTOS_shutdown` fails if the TCP // connection is dead, which is likely to // happen in practice and has no impact here. - FreeRTOS_shutdown(socket->socket, FREERTOS_SHUT_RDWR); + FreeRTOS_shutdown(rawSocket, FREERTOS_SHUT_RDWR); - auto localPort = ntohs(socket->socket->usLocalPort); - if (socket->socket->bits.bIsIPv6) + auto localPort = ntohs(rawSocket->usLocalPort); + if (rawSocket->bits.bIsIPv6) { if (isTCP) { - firewall_remove_tcpipv6_local_endpoint(localPort); + // This only fails if the + // socket is not a TCP + // socket, which shouldn't + // happen here. + struct freertos_sockaddr address = {0}; + FreeRTOS_GetRemoteAddress(rawSocket, &address); + + // If the socket is in + // listening mode, remove the + // associated server port + // from the firewall. + // Otherwise close the + // corresponding firewall + // hole. + if (rawSocket->u.xTCP.eTCPState == eTCP_LISTEN) + { + firewall_remove_tcpipv6_server_port(localPort); + } + else + { + firewall_remove_tcpipv6_remote_endpoint( + address.sin_address.xIP_IPv6.ucBytes, + localPort, + address.sin_port); + } } else { @@ -707,7 +963,24 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket) { if (isTCP) { - firewall_remove_tcpipv4_local_endpoint(localPort); + // This only fails if the + // socket is not a TCP + // socket, which shouldn't + // happen here. + struct freertos_sockaddr address = {0}; + FreeRTOS_GetRemoteAddress(socket->socket, &address); + + if (rawSocket->u.xTCP.eTCPState == eTCP_LISTEN) + { + firewall_remove_tcpipv4_server_port(localPort); + } + else + { + firewall_remove_tcpipv4_remote_endpoint( + address.sin_address.ulIP_IPv4, + localPort, + address.sin_port); + } } else { @@ -851,7 +1124,7 @@ NetworkReceiveResult network_socket_receive_from(Timeout *timeout, { return -EPERM; } - *port = FreeRTOS_ntohs(remoteAddress.sin_port); + *port = ntohs(remoteAddress.sin_port); } buffer = claimedBuffer; From 98ffaa74ca7581f0c8d453db37e9fa38e5276e17 Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Fri, 18 Oct 2024 13:58:22 -0700 Subject: [PATCH 5/9] Update NetAPI documentation to return untagged values on error. The NetAPI currently uses null as common error value for pointer types. This is problematic as it does not cover "compartment-wide" types of failure, such as `-ECOMPARTMENTFAIL` or `-ENOTENOUGHSTACK`. Address this by documenting error values for pointer types as untagged values instead of null. This does not require changes in the NetAPI implementation, as null is itself untagged. Examples do not currently check error values so do not need updating either. (Note: maybe they should to give an example of proper use of the API...) Signed-off-by: Hugo Lefeuvre --- include/NetAPI.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/include/NetAPI.h b/include/NetAPI.h index 8fc5d80..a6b081b 100644 --- a/include/NetAPI.h +++ b/include/NetAPI.h @@ -52,8 +52,8 @@ void __cheri_compartment("TCPIP") network_start(void); * The `hostCapability` argument is a capability authorising the connection to * a specific host. * - * This returns a valid sealed capability to a socket on success, or a null on - * failure. + * This returns a valid sealed capability to a socket on success, or an + * untagged value on failure. */ SObj __cheri_compartment("NetAPI") network_socket_connect_tcp(Timeout *timeout, @@ -69,8 +69,8 @@ SObj __cheri_compartment("NetAPI") * The `bindCapability` argument is a capability authorising the bind to a * specific server port. * - * This returns a valid sealed capability to a socket on success, or a null on - * failure. + * This returns a valid sealed capability to a socket on success, or an + * untagged value on failure. */ SObj __cheri_compartment("NetAPI") network_socket_listen_tcp(Timeout *timeout, @@ -88,7 +88,7 @@ SObj __cheri_compartment("NetAPI") * in the client's address or port. * * This returns a valid sealed capability to a connected socket on success, or - * a null on failure. + * an untagged value on failure. */ SObj __cheri_compartment("TCPIP") network_socket_accept_tcp(Timeout *timeout, @@ -107,8 +107,8 @@ SObj __cheri_compartment("TCPIP") * Instead, each remote host must be separately authorised with * `network_socket_udp_authorise_host`. * - * Returns a valid sealed capability to a socket on success, or a null on - * failure. + * This returns a valid sealed capability to a socket on success, or an + * untagged value on failure. */ SObj __cheri_compartment("TCPIP") network_socket_udp(Timeout *timeout, SObj mallocCapability, bool isIPv6); @@ -175,7 +175,8 @@ struct NetworkReceiveResult * freeing this buffer. * * The `bytesReceived` field of the result will be negative if an error - * occurred. The `buffer` field will be null if no data were received. + * occurred. The `buffer` field will be an untagged value if no data were + * received. * * The negative values will be errno values: * @@ -222,7 +223,8 @@ int __cheri_compartment("TCPIP") * sender's address or port. * * The `bytesReceived` field of the result will be negative if an error - * occurred. The `buffer` field will be null if no data were received. + * occurred. The `buffer` field will be an untagged value if no data were + * received. * * Note that errors occuring in this function (particularly timeout and invalid * `address` or `port` pointers) may cause UDP packets to be dropped. @@ -280,8 +282,8 @@ enum ConnectionType : uint8_t }; /** - * Returns the host name embedded in a host capability or null if this is not a - * valid host capability. + * Returns the host name embedded in a host capability or an untagged value if + * this is not a valid host capability. */ const char *__cheri_compartment("NetAPI") network_host_get(SObj hostCapability); From de2deec411a544dc11bb7c0edfb265e83053f5c0 Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Fri, 18 Oct 2024 17:33:41 -0700 Subject: [PATCH 6/9] Update the network_stack.rego to match changes. This commit updates the auditing policy of the network stack to include the new firewall APIs and the new bind capabilities. This commit also fixes some pre-existing issues: - checks were missing from some previous changes (`ethernet_link_is_up`, `firewall_mac_address_get`, among others) - the SNTP-related rules always fail if SNTP is not compiled in Signed-off-by: Hugo Lefeuvre --- network_stack.rego | 99 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 12 deletions(-) diff --git a/network_stack.rego b/network_stack.rego index a6b831e..7d8f033 100644 --- a/network_stack.rego +++ b/network_stack.rego @@ -29,7 +29,7 @@ decode_connection_capability(connection) = decoded { is_number(connectionType) connectionType < 2 connectionType >= 0 - # Padding byte must be zero + # Padding byte must be zero integer_from_hex_string(connection.contents, 1, 1) == 0 some hostLength @@ -55,6 +55,52 @@ all_sealed_connection_capabilities_are_valid { } } +# Evaluates to true if this is a bind capability. This does not check whether +# it is a *valid* bind capability, only that it is a sealed object of the +# correct kind. +is_bind_capability(bind) { + bind.kind == "SealedObject" + bind.sealing_type.compartment == "NetAPI" + bind.sealing_type.key == "NetworkBindKey" +} + +# Check that this is a valid bind capability and, if so, decode its contents. +# Returns an object with the connection protocol (isIPv6: false for IPv4, true +# for IPv6), the port, and the maximum number of simultaneous TCP connections +# allowed on the server port. +decode_bind_capability(bind) = decoded { + is_bind_capability(bind) + isIPv6 = integer_from_hex_string(bind.contents, 0, 1) + is_number(isIPv6) + # isIPv6 is a boolean, so within [0, 1] + isIPv6 < 2 + isIPv6 >= 0 + # Padding byte must be zero + integer_from_hex_string(bind.contents, 1, 1) == 0 + port = integer_from_hex_string(bind.contents, 2, 2) + is_number(port) + maxConnections = integer_from_hex_string(bind.contents, 4, 2) + is_number(maxConnections) + # Padding bytes must be zero + integer_from_hex_string(bind.contents, 6, 2) == 0 + + decoded = { + "isIPv6": isIPv6 == 1, + "port": port, + "maxConnections": maxConnections + } +} + +# Predicate that checks that every bind capability held by any +# compartment or library is valid. +all_sealed_bind_capabilities_are_valid { + some binds + binds = [ b | b = input.compartments[_].imports[_] ; is_bind_capability(b) ] + every b in binds { + decode_bind_capability(b) + } +} + is_firewall_thread(thread) { thread.entry_point.compartment_name == "Firewall" thread.entry_point.function == "ethernet_run_driver()" @@ -83,14 +129,38 @@ network_thread_is_valid { thread[0].trusted_stack.length >= 312 } +# Evaluates to true if this is an sntp cache. Similarly to other `is_*` +# functions, this does not check whether it is a *valid* cache, only that it is +# a shared object of the correct kind and with the right name. +is_sntp_cache(cache) { + cache.kind == "SharedObject" + cache.shared_object == "sntp_time_at_last_sync" +} + +# Check that the SNTP cache is valid. +sntp_cache_is_valid { + some caches + caches = [ c | c=input.compartments[_].imports[_] ; is_sntp_cache(c) ] + every c in caches { + # SNTP cache is the right size and is writeable only by the sntp compartment + data.compartment.shared_object_writeable_allow_list("sntp_time_at_last_sync", {"SNTP"}) + c.length = 24 + } +} + # Helper to dump all connection capabilities and the compartment that owns them all_connection_capabilities = [ { "owner": owner, "capability": decode_connection_capability(c) } | c = input.compartments[owner].imports[_] ; is_connection_capability(c) ] -# The internal configuration for the network interface is valid. The parameter -# is the name of the network device. For example, "kunyan_ethernet" on the -# Arty A7. +# Helper to dump all bind capabilities and the compartment that owns them +all_bind_capabilities = [ { "owner": owner, "capability": decode_bind_capability(c) } | c = input.compartments[owner].imports[_] ; is_bind_capability(c) ] + +# The internal configuration for the network interface is valid. +# Parameters: +# `ethernetDevice`: name of the network device. For example, +# "kunyan_ethernet" on the Arty A7. valid(ethernetDevice) { all_sealed_connection_capabilities_are_valid + all_sealed_bind_capabilities_are_valid firewall_thread_is_valid network_thread_is_valid data.compartment.compartment_call_allow_list("TCPIP", "network_host_resolve.*", {"NetAPI"}) @@ -99,26 +169,31 @@ valid(ethernetDevice) { data.compartment.compartment_call_allow_list("TCPIP", "ethernet_receive_frame.*", {"Firewall"}) data.compartment.compartment_call_allow_list("TCPIP", "ip_thread_entry.*", set()) data.compartment.compartment_call_allow_list("Firewall", "ethernet_send_frame.*", {"TCPIP"}) - data.compartment.compartment_call_allow_list("Firewall", "firewall_dns_server_ip_set.*", {"TCPIP"}) data.compartment.compartment_call_allow_list("Firewall", "ethernet_driver_start.*", {"TCPIP"}) + data.compartment.compartment_call_allow_list("Firewall", "ethernet_link_is_up.*", {"TCPIP"}) + data.compartment.compartment_call_allow_list("Firewall", "firewall_dns_server_ip_set.*", {"TCPIP"}) data.compartment.compartment_call_allow_list("Firewall", "firewall_permit_dns.*", {"NetAPI"}) data.compartment.compartment_call_allow_list("Firewall", "firewall_add_tcpipv4_endpoint.*", {"NetAPI"}) data.compartment.compartment_call_allow_list("Firewall", "firewall_add_udpipv4_endpoint.*", {"NetAPI"}) - data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_tcpipv4_endpoint.*", {"NetAPI", "TCPIP"}) + data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_tcpipv4_local_endpoint.*", {"NetAPI", "TCPIP"}) + data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_tcpipv4_remote_endpoint.*", {"NetAPI", "TCPIP"}) data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_udpipv4_local_endpoint.*", {"NetAPI", "TCPIP"}) data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_udpipv4_remote_endpoint.*", {"NetAPI"}) + data.compartment.compartment_call_allow_list("Firewall", "firewall_add_tcpipv4_server_port.*", {"NetAPI"}) + data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_tcpipv4_server_port.*", {"NetAPI", "TCPIP"}) data.compartment.compartment_call_allow_list("Firewall", "firewall_add_tcpipv6_endpoint.*", {"NetAPI"}) data.compartment.compartment_call_allow_list("Firewall", "firewall_add_udpipv6_endpoint.*", {"NetAPI"}) - data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_tcpipv6_endpoint.*", {"NetAPI", "TCPIP"}) + data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_tcpipv6_local_endpoint.*", {"NetAPI", "TCPIP"}) + data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_tcpipv6_remote_endpoint.*", {"NetAPI", "TCPIP"}) data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_udpipv6_local_endpoint.*", {"NetAPI", "TCPIP"}) data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_udpipv6_remote_endpoint.*", {"NetAPI"}) + data.compartment.compartment_call_allow_list("Firewall", "firewall_add_tcpipv6_server_port.*", {"NetAPI"}) + data.compartment.compartment_call_allow_list("Firewall", "firewall_remove_tcpipv6_server_port.*", {"NetAPI", "TCPIP"}) + data.compartment.compartment_call_allow_list("Firewall", "firewall_mac_address_get.*", {"TCPIP"}) data.compartment.compartment_call_allow_list("Firewall", "ethernet_run_driver.*", set()) data.compartment.mmio_allow_list(ethernetDevice, {"Firewall"}) - # SNTP cache is the right size and is writeable only by the sntp compartment - data.compartment.shared_object_writeable_allow_list("sntp_time_at_last_sync", {"SNTP"}) - some sntpCache - sntpCache = data.compartment.shared_object("sntp_time_at_last_sync") - sntpCache.end - sntpCache.start = 24 + + sntp_cache_is_valid } From 2583da7ffb3d038971844a1bccfc241c497a36c9 Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Thu, 19 Sep 2024 19:32:12 -0700 Subject: [PATCH 7/9] Add an HTTP server example to showcase the new TCP server API. Signed-off-by: Hugo Lefeuvre --- examples/05.HTTP_SERVER/README.md | 6 + examples/05.HTTP_SERVER/http_server.cc | 218 +++++++++++++++++++++++++ examples/05.HTTP_SERVER/xmake.lua | 62 +++++++ 3 files changed, 286 insertions(+) create mode 100644 examples/05.HTTP_SERVER/README.md create mode 100644 examples/05.HTTP_SERVER/http_server.cc create mode 100644 examples/05.HTTP_SERVER/xmake.lua diff --git a/examples/05.HTTP_SERVER/README.md b/examples/05.HTTP_SERVER/README.md new file mode 100644 index 0000000..f930c99 --- /dev/null +++ b/examples/05.HTTP_SERVER/README.md @@ -0,0 +1,6 @@ +HTTP Server Example +=================== + +This example shows simple use of the TCP socket server API. +It opens server port 80 and serves a static HTTP page there. +Note that this is *not* intended as an example of how to build an HTTP server. diff --git a/examples/05.HTTP_SERVER/http_server.cc b/examples/05.HTTP_SERVER/http_server.cc new file mode 100644 index 0000000..4a084e2 --- /dev/null +++ b/examples/05.HTTP_SERVER/http_server.cc @@ -0,0 +1,218 @@ +#include "timeout.h" +#include +#include +#include +#include +#include +#include +#include +#include + +using CHERI::Capability; + +using Debug = ConditionalDebug; +constexpr bool UseIPv6 = CHERIOT_RTOS_OPTION_IPv6; + +/** + * Bind capability for the server port 80. Use IPv6 if enabled in the + * configuration, and allow at most two simultaneous connections to the server. + */ +#define LISTEN_PORT 80 +DECLARE_AND_DEFINE_BIND_CAPABILITY(HTTPPort, UseIPv6, LISTEN_PORT, 10); + +DECLARE_AND_DEFINE_ALLOCATOR_CAPABILITY(TestMalloc, 32 * 1024); +#define TEST_MALLOC STATIC_SEALED_VALUE(TestMalloc) + +static char reply[] = + "HTTP/1.1 200 OK\r\n" + "Content-type: text/html\r\n" + "Connection: close\r\n" + "\r\n" + "" + "" + "Hello from CHERIoT!" + "

It works!

Served from a CHERIoT device.

" + "\n"; + +/** + * Maximum number of clients the server will server before shutting down. This + * is useful to check that the server can handle multiple clients before + * terminating. + */ +static const uint16_t MaxClients = 10; + +/** + * This example server is written to showcase the network stack server API, but + * also the network stack restart. If one of the network stack APIs fails in a + * way likely caused by a crash, the server will wait this small delay before + * retrying to give time to the network stack to reset. + */ +static const uint16_t RestartDelay = 100; // in ticks + +void __cheri_compartment("http_server_example") example() +{ + network_start(); + + auto heapAtStart = heap_quota_remaining(TEST_MALLOC); + + uint16_t clientsCounter = 0; + + Debug::log("Starting the server."); + + while (clientsCounter < MaxClients) + { + Debug::log("Creating a listening socket."); + Timeout unlimited{UnlimitedTimeout}; + auto socket = network_socket_listen_tcp( + &unlimited, TEST_MALLOC, STATIC_SEALED_VALUE(HTTPPort)); + + if (!Capability{socket}.is_valid()) + { + Debug::log("Failed to create a listening socket."); + // This may have failed because of a network stack + // crash. Sleep a little bit to enable a reset. + Timeout sleep{RestartDelay}; + thread_sleep(&sleep); + continue; + } + + Debug::log("Listening on port {}...", LISTEN_PORT); + while (clientsCounter < MaxClients) + { + NetworkAddress clientAddress = {0}; + uint16_t clientPort = 0; + + auto clientSocket = network_socket_accept_tcp( + &unlimited, TEST_MALLOC, socket, &clientAddress, &clientPort); + + if (!Capability{clientSocket}.is_valid()) + { + Debug::log("Failed to establish a connection."); + Timeout sleep{RestartDelay}; + thread_sleep(&sleep); + break; + } + + if (!UseIPv6) + { + Debug::log("Established a connection with {}.{}.{}.{}:{}", + static_cast(clientAddress.ipv4) & 0xff, + static_cast(clientAddress.ipv4 >> 8) & 0xff, + static_cast(clientAddress.ipv4 >> 16) & 0xff, + static_cast(clientAddress.ipv4 >> 24) & 0xff, + clientPort); + } + else + { + Debug::log("Established a connection."); + } + + clientsCounter++; + + auto [received, buffer] = + network_socket_receive(&unlimited, TEST_MALLOC, clientSocket); + + // For this simple server, we do not care about what the client + // sent (we will always serve the same content). + int ret = heap_free(TEST_MALLOC, buffer); + if (ret != 0) + { + // This may happen if the network stack + // crashed: if the network stack crashes during + // `network_socket_receive` it is likely that + // `buffer` will be a nullptr or any other + // untagged value, and `heap_free` will return + // an error when passed the invalid thing. + Debug::log("Failed to free receive buffer: {}", ret); + // Do not break here - we still want to free the socket. + } + + if (received > 0) + { + Debug::log( + "Received {} bytes from the client, serving static content.", + received); + constexpr size_t ToSend = sizeof(reply) - 1; + size_t sent = 0; + while (sent < ToSend) + { + size_t remaining = ToSend - sent; + + ssize_t sentThisCall = network_socket_send( + &unlimited, clientSocket, &(reply[sent]), remaining); + Debug::log("Sent {} bytes", sentThisCall); + + if (sentThisCall >= 0) + { + sent += sentThisCall; + } + else + { + Debug::log("Send failed: {}", sentThisCall); + break; + } + } + } + else + { + Debug::log( + "Failed to receive request from the client, error {}.", + received); + } + + Debug::log("Terminating the connection with the client."); + int retries = 10; + // In a retry loop to be more rebust to network stack crashes. + for (; retries > 0; retries--) + { + if (network_socket_close( + &unlimited, TEST_MALLOC, clientSocket) == 0) + { + break; + } + Timeout sleep{RestartDelay}; + thread_sleep(&sleep); + } + + if (retries == 0) + { + Debug::log("Failed to close the client socket."); + } + } + + Debug::log("Closing the listening socket."); + int retries = 10; + // In a retry loop to be more rebust to network stack crashes. + for (; retries > 0; retries--) + { + if (network_socket_close(&unlimited, TEST_MALLOC, socket) == 0) + { + break; + } + Timeout sleep{RestartDelay}; + thread_sleep(&sleep); + } + + if (retries == 0) + { + Debug::log("Failed to close the listening socket."); + } + } + + Debug::log("Now checking for leaks."); + auto heapAtEnd = heap_quota_remaining(TEST_MALLOC); + if (heapAtEnd < heapAtStart) + { + Debug::log("Warning: The implementation leaked {} bytes (start: {} vs. " + "end: {}).", + heapAtStart - heapAtEnd, + heapAtEnd, + heapAtStart); + } + else + { + Debug::log("No leaks detected."); + } + + Debug::log("Terminating the server."); +} diff --git a/examples/05.HTTP_SERVER/xmake.lua b/examples/05.HTTP_SERVER/xmake.lua new file mode 100644 index 0000000..f2fddcb --- /dev/null +++ b/examples/05.HTTP_SERVER/xmake.lua @@ -0,0 +1,62 @@ +-- Copyright CHERIoT Contributors. +-- SPDX-License-Identifier: MIT + +-- Update this to point to the location of the CHERIoT SDK +sdkdir = path.absolute("../../../cheriot-rtos/sdk") + +set_project("CHERIoT HTTP Server Example") + +includes(sdkdir) + +set_toolchains("cheriot-clang") + +includes(path.join(sdkdir, "lib")) +includes("../../lib") + +option("board") + set_default("ibex-arty-a7-100") + +compartment("http_server_example") + set_default(false) + add_includedirs("../../include") + add_deps("freestanding", "TCPIP", "NetAPI") + add_files("http_server.cc") + on_load(function(target) + target:add('options', "IPv6") + local IPv6 = get_config("IPv6") + target:add("defines", "CHERIOT_RTOS_OPTION_IPv6=" .. tostring(IPv6)) + end) + +firmware("05.http_server_example") + set_policy("build.warning", true) + add_deps("TCPIP", "Firewall", "NetAPI", "http_server_example", "atomic8", "debug") + on_load(function(target) + target:values_set("board", "$(board)") + target:values_set("threads", { + { + compartment = "http_server_example", + priority = 1, + entry_point = "example", + stack_size = 0xe00, + trusted_stack_frames = 6 + }, + { + compartment = "TCPIP", + priority = 1, + entry_point = "ip_thread_entry", + stack_size = 0xe00, + trusted_stack_frames = 5 + }, + { + compartment = "Firewall", + -- Higher priority, this will be back-pressured by the message + -- queue if the network stack can't keep up, but we want + -- packets to arrive immediately. + priority = 2, + entry_point = "ethernet_run_driver", + stack_size = 0x1000, + trusted_stack_frames = 5 + } + }, {expand = false}) + end) + From 168ca1947e02cd5c87fcad77e57861334afad498 Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Mon, 21 Oct 2024 10:25:53 -0700 Subject: [PATCH 8/9] Fix send API return value in examples 2 and 3. `network_socket_send` and `tls_connection_send` return a signed value, so the current code is incorrect. Signed-off-by: Hugo Lefeuvre --- examples/02.HTTP/http.cc | 2 +- examples/03.HTTPS/https.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/02.HTTP/http.cc b/examples/02.HTTP/http.cc index 1eaefa1..e443fee 100644 --- a/examples/02.HTTP/http.cc +++ b/examples/02.HTTP/http.cc @@ -39,7 +39,7 @@ void __cheri_compartment("http_example") example() { size_t remaining = toSend - sent; - size_t sentThisCall = + ssize_t sentThisCall = network_socket_send(&unlimited, socket, &(message[sent]), remaining); Debug::log("Sent {} bytes", sentThisCall); diff --git a/examples/03.HTTPS/https.cc b/examples/03.HTTPS/https.cc index f1d0a27..59a99ce 100644 --- a/examples/03.HTTPS/https.cc +++ b/examples/03.HTTPS/https.cc @@ -76,7 +76,7 @@ void __cheri_compartment("https_example") example() { size_t remaining = toSend - sent; - size_t sentThisCall = + ssize_t sentThisCall = tls_connection_send(&t, tlsSocket, &(message[sent]), remaining, 0); Debug::log("Sent {} bytes", sentThisCall); From ed0f11f41f07fd07f0794a47d0ef76853d17d7af Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Fri, 18 Oct 2024 13:51:37 -0700 Subject: [PATCH 9/9] Remove `set_default(false)` in example compartments. The compartments will be compiled anyways as dependencies. Signed-off-by: Hugo Lefeuvre --- examples/02.HTTP/xmake.lua | 1 - examples/03.HTTPS/xmake.lua | 1 - examples/04.MQTT/xmake.lua | 1 - examples/05.HTTP_SERVER/xmake.lua | 1 - 4 files changed, 4 deletions(-) diff --git a/examples/02.HTTP/xmake.lua b/examples/02.HTTP/xmake.lua index 8c78c2a..1263644 100644 --- a/examples/02.HTTP/xmake.lua +++ b/examples/02.HTTP/xmake.lua @@ -17,7 +17,6 @@ option("board") set_default("ibex-arty-a7-100") compartment("http_example") - set_default(false) add_includedirs("../../include") add_deps("freestanding", "TCPIP", "NetAPI") add_files("http.cc") diff --git a/examples/03.HTTPS/xmake.lua b/examples/03.HTTPS/xmake.lua index 21e6f65..533ce7d 100644 --- a/examples/03.HTTPS/xmake.lua +++ b/examples/03.HTTPS/xmake.lua @@ -17,7 +17,6 @@ option("board") set_default("ibex-arty-a7-100") compartment("https_example") - set_default(false) add_includedirs("../../include") add_deps("freestanding", "TCPIP", "NetAPI", "TLS", "Firewall", "SNTP", "time_helpers", "debug") add_files("https.cc") diff --git a/examples/04.MQTT/xmake.lua b/examples/04.MQTT/xmake.lua index 45acb51..36aed1d 100644 --- a/examples/04.MQTT/xmake.lua +++ b/examples/04.MQTT/xmake.lua @@ -17,7 +17,6 @@ option("board") set_default("ibex-arty-a7-100") compartment("mqtt_example") - set_default(false) add_includedirs("../../include") add_deps("freestanding", "TCPIP", "NetAPI", "TLS", "Firewall", "SNTP", "MQTT", "time_helpers", "debug") -- stdio only needed for debug prints in MQTT, can be removed with --debug-mqtt=n diff --git a/examples/05.HTTP_SERVER/xmake.lua b/examples/05.HTTP_SERVER/xmake.lua index f2fddcb..889c2ca 100644 --- a/examples/05.HTTP_SERVER/xmake.lua +++ b/examples/05.HTTP_SERVER/xmake.lua @@ -17,7 +17,6 @@ option("board") set_default("ibex-arty-a7-100") compartment("http_server_example") - set_default(false) add_includedirs("../../include") add_deps("freestanding", "TCPIP", "NetAPI") add_files("http_server.cc")