diff --git a/core/src/filter/actions.rs b/core/src/filter/actions.rs index 4da24b26..3888a6c9 100644 --- a/core/src/filter/actions.rs +++ b/core/src/filter/actions.rs @@ -1,23 +1,22 @@ -use bitmask_enum::bitmask; -/// For each connectionn, the Retina framework applies multiple filtering stages as -/// packets are received in order to determine (1) whether packets from that connection -/// should continue to be processed and (2) what to do with these packets. -/// -/// Each connection is associated with a set of Actions. These actions specify the -/// operations the framework will perform for the connection *now or in the future*: -/// e.g., probe for the application-layer protocol (until it is identified), deliver -/// the connection (when it has terminated), deliver all subsequent packets in the -/// connection, etc. An empty Actions struct will cause the connection to be dropped. -/// -/// Each filter stage returns a set of actions and a set of terminal actions. -/// The terminal actions are the subset of actions that are maintained through -/// the next filter stage. +//! For each connection, the Retina framework applies multiple filtering stages as +//! packets are received in order to determine (1) whether packets from that connection +//! should continue to be processed and (2) what to do with these packets. +//! +//! Each connection is associated with a set of Actions. These actions specify the +//! operations the framework will perform for the connection *now or in the future*: +//! e.g., probe for the application-layer protocol (until it is identified), deliver +//! the connection (when it has terminated), deliver all subsequent packets in the +//! connection, etc. An empty Actions struct will cause the connection to be dropped. +//! +//! Each filter stage returns a set of actions and a set of terminal actions. +//! The terminal actions are the subset of actions that are maintained through +//! the next filter stage. use std::fmt; +use bitmask_enum::bitmask; #[bitmask] #[bitmask_config(vec_debug)] pub enum ActionData { - // Packet actions // /// Forward new packet to connection tracker /// Should only be used in the PacketContinue filter PacketContinue, @@ -36,7 +35,6 @@ pub enum ActionData { /// datatype that requires tracking and delivering packets. PacketTrack, - // Connection/session actions // /// Probe for (identify) the application-layer protocol ProtoProbe, /// Once the application-layer protocl is identified, apply the ProtocolFilter. @@ -60,6 +58,7 @@ pub enum ActionData { ConnDeliver, } +/// Actions maintained per-connection #[derive(Debug, Clone, Hash, Eq, PartialEq)] pub struct Actions { /// All actions (terminal and non-terminal) that should @@ -69,7 +68,7 @@ pub struct Actions { /// regardless of what the next filter returns /// E.g., if a terminal match for a connection-level filter /// occurs at the packet layer, we should continue tracking - /// the connection without re-applying that filter. + /// the connection regardless of later filter results. pub terminal_actions: ActionData, } @@ -80,7 +79,7 @@ impl Default for Actions { } impl Actions { - /// Create an empty Actions bitmask + // Create an empty Actions bitmask pub fn new() -> Self { Self { data: ActionData::none(), @@ -88,38 +87,38 @@ impl Actions { } } - /// Store the result of a new filter - /// Used at runtime after application of next filter + // Store the result of a new filter + // Used at runtime after application of next filter #[inline] pub fn update(&mut self, actions: &Actions) { self.data = self.terminal_actions | actions.data; self.terminal_actions |= actions.terminal_actions; } - /// Combine terminal and non-terminal actions - /// Used for building a filter tree at compile time and when - /// applying a filter at runtime if additional conditions are met. + // Combine terminal and non-terminal actions + // Used for building a filter tree at compile time and when + // applying a filter at runtime if additional conditions are met. #[inline] pub fn push(&mut self, actions: &Actions) { self.data |= actions.data; self.terminal_actions |= actions.terminal_actions; } - /// Returns true if no actions are set (i.e., the connection can - /// be dropped by the framework). + // Returns true if no actions are set (i.e., the connection can + // be dropped by the framework). #[inline] pub fn drop(&self) -> bool { self.data.is_none() && self.terminal_actions.is_none() } - /// Update `self` to contain only actions not in `actions` + // Update `self` to contain only actions not in `actions` #[inline] pub(crate) fn clear_intersection(&mut self, actions: &Actions) { self.data &= actions.data.not(); self.terminal_actions &= actions.data.not(); } - /// Conn tracker must deliver each PDU to tracked data when received + // Conn tracker must deliver each PDU to tracked data when received #[inline] pub(crate) fn update_pdu(&self) -> bool { self.data.intersects(ActionData::UpdatePDU) @@ -148,7 +147,7 @@ impl Actions { self.data.intersects(ActionData::PacketCache) } - /// True if application-layer probing or parsing should be applied + // True if application-layer probing or parsing should be applied #[inline] pub(crate) fn parse_any(&self) -> bool { self.data.intersects( @@ -167,7 +166,7 @@ impl Actions { self.data == ActionData::ConnDeliver } - /// True if the session filter should be applied + // True if the session filter should be applied #[inline] pub(crate) fn apply_session_filter(&mut self) -> bool { // \note deliver filter is in session filter @@ -175,20 +174,20 @@ impl Actions { .intersects(ActionData::SessionFilter | ActionData::SessionDeliver) } - /// True if the protocol filter should be applied + // True if the protocol filter should be applied #[inline] pub(crate) fn apply_proto_filter(&mut self) -> bool { self.data.contains(ActionData::ProtoFilter) } - /// True if the framework should probe for the app-layer protocol + // True if the framework should probe for the app-layer protocol #[inline] pub(crate) fn session_probe(&self) -> bool { self.data .intersects(ActionData::ProtoProbe | ActionData::ProtoFilter) } - /// True if the framework should parse application-layer data + // True if the framework should parse application-layer data #[inline] pub(crate) fn session_parse(&self) -> bool { self.data.intersects( @@ -196,22 +195,22 @@ impl Actions { ) && !self.session_probe() // still at probing stage } - /// True if the framework should buffer parsed sessions + // True if the framework should buffer parsed sessions #[inline] pub(crate) fn session_track(&self) -> bool { self.data.intersects(ActionData::SessionTrack) } - /// True if the framework should deliver future packets in this connection + // True if the framework should deliver future packets in this connection #[inline] pub(crate) fn packet_deliver(&self) -> bool { self.data.intersects(ActionData::PacketDeliver) } - /// After parsing a session, the framework must decide whether to continue - /// probing for sessions depending on the protocol - /// If no further parsing is required (e.g., TLS Handshake), this method - /// should be invoked. + // After parsing a session, the framework must decide whether to continue + // probing for sessions depending on the protocol + // If no further parsing is required (e.g., TLS Handshake), this method + // should be invoked. #[inline] pub(crate) fn session_clear_parse(&mut self) { self.clear_mask( @@ -222,8 +221,8 @@ impl Actions { ); } - /// Subscription requires protocol probe/parse but matched at packet stage - /// Update action to reflect state transition to protocol parsing + // Subscription requires protocol probe/parse but matched at packet stage + // Update action to reflect state transition to protocol parsing #[inline] pub(crate) fn session_done_probe(&mut self) { if self.terminal_actions.contains(ActionData::ProtoProbe) { @@ -235,8 +234,8 @@ impl Actions { } } - /// Some app-layer protocols revert to probing after session is parsed - /// This is done if more sessions are expected + // Some app-layer protocols revert to probing after session is parsed + // This is done if more sessions are expected pub(crate) fn session_set_probe(&mut self) { // If protocol probing was set at the PacketFilter stage (i.e., // terminal match for a subscription that requires parsing sessions), @@ -263,20 +262,20 @@ impl Actions { */ } - /// True if the connection should be delivered at termination + // True if the connection should be delivered at termination #[inline] pub(crate) fn connection_matched(&self) -> bool { self.terminal_actions.intersects(ActionData::ConnDeliver) } - /// Clear all actions + // Clear all actions #[inline] pub(crate) fn clear(&mut self) { self.terminal_actions = ActionData::none(); self.data = ActionData::none(); } - /// Clear a subset of actions + // Clear a subset of actions #[inline] pub(crate) fn clear_mask(&mut self, mask: ActionData) { self.data &= mask.not(); diff --git a/core/src/filter/ast.rs b/core/src/filter/ast.rs index 2290b3b4..52db674d 100644 --- a/core/src/filter/ast.rs +++ b/core/src/filter/ast.rs @@ -43,6 +43,8 @@ lazy_static! { } lazy_static! { + /// Graph of possible protocol layers used to build the filter tree. + /// For example, "tls" must be preceded by "tcp". pub(crate) static ref NODE_BIMAP: BiMap:: = { LAYERS .node_indices() @@ -51,8 +53,8 @@ lazy_static! { }; } -/// Returns `true` if there is a path from `from` to `to` in the -/// protocol LAYERS graph. +// Returns `true` if there is a path from `from` to `to` in the +// protocol LAYERS graph. fn has_path(from: &ProtocolName, to: &ProtocolName) -> bool { // Returns `false` if from == to let from_node = NODE_BIMAP.get_by_right(from); @@ -72,9 +74,11 @@ fn has_path(from: &ProtocolName, to: &ProtocolName) -> bool { /// An individual filter predicate #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Predicate { + /// Matches on a protocol Unary { protocol: ProtocolName, }, + /// Matches on a field in a protocol Binary { protocol: ProtocolName, field: FieldName, @@ -84,7 +88,7 @@ pub enum Predicate { } impl Predicate { - /// Returns the name of the protocol. + // Returns the name of the protocol. pub fn get_protocol(&self) -> &ProtocolName { match self { Predicate::Unary { protocol } => protocol, @@ -92,24 +96,24 @@ impl Predicate { } } - /// Returns `true` if predicate is a unary constraint. + // Returns `true` if predicate is a unary constraint. pub fn is_unary(&self) -> bool { matches!(self, Predicate::Unary { .. }) } - /// Returns `true` if predicate is a binary constraint. + // Returns `true` if predicate is a binary constraint. pub fn is_binary(&self) -> bool { matches!(self, Predicate::Binary { .. }) } - /// Returns `true` if predicate can be pushed to a packet filter. - /// i.e., the lowest filter level needed to apply the predicate is a packet filter. + // Returns `true` if predicate can be pushed to a packet filter. + // i.e., the lowest filter level needed to apply the predicate is a packet filter. pub fn on_packet(&self) -> bool { !self.needs_conntrack() } - /// Returns `true` if predicate *requires* raw packets - /// (i.e., cannot be connection-level data) + // Returns `true` if predicate *requires* raw packets + // (i.e., cannot be connection-level data) pub fn req_packet(&self) -> bool { if !self.on_packet() { return false; @@ -127,20 +131,20 @@ impl Predicate { !ConnData::supported_protocols().contains(&self.get_protocol().name()) } - /// Returns `true` if predicate can be satisfied by a connection filter. - /// i.e., the lowest filter level needed to apply the predicate is a connection filter. + // Returns `true` if predicate can be satisfied by a connection filter. + // i.e., the lowest filter level needed to apply the predicate is a connection filter. pub fn on_proto(&self) -> bool { self.needs_conntrack() && self.is_unary() } - /// Returns `true` if predicate can be satisfied by a session filter. - /// i.e., the lowest filter level needed to apply the predicate is a session filter. + // Returns `true` if predicate can be satisfied by a session filter. + // i.e., the lowest filter level needed to apply the predicate is a session filter. pub fn on_session(&self) -> bool { self.needs_conntrack() && self.is_binary() } - /// Returns `true` if the predicate's protocol requires connection tracking - /// i.e., is an application-layer protocol that runs on top of TCP or UDP. + // Returns `true` if the predicate's protocol requires connection tracking + // i.e., is an application-layer protocol that runs on top of TCP or UDP. fn needs_conntrack(&self) -> bool { has_path(self.get_protocol(), &protocol!("tcp")) || has_path(self.get_protocol(), &protocol!("udp")) @@ -156,6 +160,8 @@ impl Predicate { } } + // Returns `true` if the predicate would have been checked at the previous + // filter layer based on both the filter layer and the subscription level. pub(super) fn is_prev_layer( &self, filter_layer: FilterLayer, @@ -187,7 +193,7 @@ impl Predicate { } } - // Predicate would have been checked at prev. layer + // Returns true if the predicate would have been checked at prev. layer // Does not consider subscription type; meant to be used for filter collapse. pub(super) fn is_prev_layer_pred(&self, filter_layer: FilterLayer) -> bool { match filter_layer { @@ -204,13 +210,13 @@ impl Predicate { } } - /// Returns `true` if predicate can be pushed down to hardware port. + // Returns `true` if predicate can be pushed down to hardware port. pub(super) fn is_hardware_filterable(&self, port: &Port) -> bool { hardware::device_supported(self, port) } - /// Returns `true` if `self` and `pred` are entirely mutually exclusive - /// (i.e., could be correctly represented by "if `a` {} else if `b` {}"...) + // Returns `true` if `self` and `pred` are entirely mutually exclusive + // (i.e., could be correctly represented by "if `a` {} else if `b` {}"...) pub(super) fn is_excl(&self, pred: &Predicate) -> bool { // Unary predicates at the same layer are mutually exclusive // E.g.: `ipv4 | ipv6`, `tcp | udp` @@ -301,7 +307,7 @@ impl Predicate { false } - /// Returns `true` if `self` is a subset of `pred` (`pred` is parent of) + // Returns `true` if `self` is a subset of `pred` (`pred` is parent of) pub(super) fn is_child(&self, pred: &Predicate) -> bool { if self.get_protocol() != pred.get_protocol() { return false; diff --git a/core/src/filter/datatypes.rs b/core/src/filter/datatypes.rs index e9a98d3e..4fd914c1 100644 --- a/core/src/filter/datatypes.rs +++ b/core/src/filter/datatypes.rs @@ -25,7 +25,6 @@ pub enum Level { Static, } -#[doc(hidden)] /// Specification for one complete subscription /// A subscription is defined as a filter, callback, and one or more datatypes /// This is public to be accessible by the filtergen crate. @@ -127,6 +126,8 @@ impl DataType { } } + /// Creates a typical datatype for a packet list + /// (Connection-level, requires updates in order to track packets) pub fn new_default_pktlist(as_str: &'static str, needs_reassembly: bool) -> Self { DataType { level: Level::Connection, @@ -430,7 +431,7 @@ impl SubscriptionSpec { ); } - /// Add a new datatype to the subscription + // Add a new datatype to the subscription pub fn add_datatype(&mut self, datatype: DataType) { self.update_level(&datatype.level); self.datatypes.push(datatype); @@ -465,7 +466,7 @@ impl SubscriptionSpec { spec } - /// Format subscription as "callback(datatypes)" + // Format subscription as "callback(datatypes)" pub fn as_str(&self) -> String { let datatype_str: Vec<&'static str> = self.datatypes.iter().map(|d| d.as_str).collect(); format!("{}({})", self.callback, datatype_str.join(", ")).to_string() diff --git a/core/src/filter/hardware/flow_action.rs b/core/src/filter/hardware/flow_action.rs index a2dcd8ea..3367a24c 100644 --- a/core/src/filter/hardware/flow_action.rs +++ b/core/src/filter/hardware/flow_action.rs @@ -5,7 +5,7 @@ use std::mem; pub(super) type ActionRules = Vec; -/// Builds a vector of rte_flow_action +// Builds a vector of rte_flow_action // #[derive(Debug, Clone)] pub(super) struct FlowAction { pub(super) rules: ActionRules, diff --git a/core/src/filter/hardware/mod.rs b/core/src/filter/hardware/mod.rs index 9d2e87dd..f82f9425 100644 --- a/core/src/filter/hardware/mod.rs +++ b/core/src/filter/hardware/mod.rs @@ -33,8 +33,8 @@ pub(crate) struct HardwareFilter<'a> { } impl<'a> HardwareFilter<'a> { - /// Creates a new HardwareFilter for port given a filter. - /// Prunes all predicates not supported by the device. + // Creates a new HardwareFilter for port given a filter. + // Prunes all predicates not supported by the device. pub(crate) fn new(filter: &Filter, port: &'a Port) -> Self { let hw_patterns = filter .get_patterns_flat() @@ -72,7 +72,7 @@ impl<'a> HardwareFilter<'a> { } } - /// Installs the hardware filter to the port. + // Installs the hardware filter to the port. pub(crate) fn install(&self) -> Result<()> { debug!("{}", self); if self.patterns.iter().all(|p| p.is_empty()) { @@ -452,7 +452,7 @@ fn drop_eth_traffic(port: &Port, group: u32, priority: u32) -> Result<()> { Ok(()) } -/// Flush all flow rules associated with port +// Flush all flow rules associated with port pub(crate) fn flush_rules(port: &Port) { info!("Flushing flow rules on Port {}", port.id); unsafe { diff --git a/core/src/filter/mod.rs b/core/src/filter/mod.rs index b21b3282..ee840f21 100644 --- a/core/src/filter/mod.rs +++ b/core/src/filter/mod.rs @@ -1,14 +1,25 @@ +//! Utilities for compile-time filter generation and subscription handling. +//! +//! This module's exports will be most relevant for those adding new filter utilities +//! and/or datatypes. Nothing in this module is needed for writing an ordinary +//! Retina application. +//! + pub mod actions; pub use actions::{ActionData, Actions}; +#[doc(hidden)] #[macro_use] pub mod macros; +#[doc(hidden)] pub mod ast; mod hardware; #[allow(clippy::upper_case_acronyms)] mod parser; mod pattern; +#[doc(hidden)] pub mod ptree; +#[doc(hidden)] pub mod ptree_flat; pub mod datatypes; @@ -29,20 +40,32 @@ use std::fmt; use anyhow::{bail, Result}; use thiserror::Error; -/// Filter types +// Filter functions +// Note: Rust won't enforce trait bounds on type alias, but T must implement Tracked. + +/// Software filter applied to each packet. Will drop, deliver, and/or +/// forward packets to the connection manager. If hardware assist is enabled, +/// the framework will additionally attempt to install the filter in the NICs. pub type PacketContFn = fn(&Mbuf, &CoreId) -> Actions; +/// Filter applied to the first packet of a connection to initialize actions. pub type PacketFilterFn = fn(&Mbuf, &T) -> Actions; +/// Filter applied when the application-layer protocol is identified. +/// This may drop connections or update actions. +/// It may also drain buffered packets to packet-level subscriptions that match +/// at the protocol stage. pub type ProtoFilterFn = fn(&ConnData, &T) -> Actions; - -// Will apply session filter and potentially deliver or store session +/// Filter applied when the application-layer session is parsed. +/// This may drop connections, drop sessions, or update actions. +/// It may also deliver session-level subscriptions. pub type SessionFilterFn = fn(&Session, &ConnData, &T) -> Actions; - -// Subscription deliver functions -// \note Rust won't enforce trait bounds on type alias, -// but T should implement Tracked. +/// Filter applied to disambiguate and deliver matched packet-level subscriptions +/// that required stateful filtering (i.e., could not be delivered at the packet stage). pub type PacketDeliverFn = fn(&Mbuf, &ConnData, &T); +/// Filter applied to disambiguate and deliver matched connection-level subscriptions +/// (those delivered at connection termination). pub type ConnDeliverFn = fn(&ConnData, &T); +#[doc(hidden)] pub struct FilterFactory where T: Trackable, @@ -115,12 +138,12 @@ impl Filter { }) } - /// Returns disjunct of layered patterns + // Returns disjunct of layered patterns pub fn get_patterns_layered(&self) -> Vec { self.patterns.clone() } - /// Returns disjuct of flat patterns + // Returns disjuct of flat patterns pub fn get_patterns_flat(&self) -> Vec { self.patterns .iter() @@ -128,12 +151,12 @@ impl Filter { .collect::>() } - /// Returns predicate tree + // Returns predicate tree pub fn to_ptree(&self) -> FlatPTree { FlatPTree::new(&self.get_patterns_flat()) } - /// Returns `true` if filter can be completely realized in hardware + // Returns `true` if filter can be completely realized in hardware pub fn is_hardware_filterable(&self) -> bool { // needs to take port as argument todo!(); diff --git a/core/src/filter/pattern.rs b/core/src/filter/pattern.rs index c6b31bca..7904f94f 100644 --- a/core/src/filter/pattern.rs +++ b/core/src/filter/pattern.rs @@ -21,12 +21,12 @@ pub struct FlatPattern { } impl FlatPattern { - /// Returns true if pattern is empty + // Returns true if pattern is empty pub(super) fn is_empty(&self) -> bool { self.predicates.is_empty() } - /// Returns true if self is a fully qualified FlatPattern + // Returns true if self is a fully qualified FlatPattern pub(super) fn is_fully_qualified(&self) -> bool { let (layers, labels) = (&*LAYERS, &*NODE_BIMAP); @@ -49,8 +49,8 @@ impl FlatPattern { ret } - /// Returns true if a pattern should be skipped at a given filter layer - /// Example: we don't need to check the pattern "ipv4 and tcp" at the session filter layer. + // Returns true if a pattern should be skipped at a given filter layer + // Example: we don't need to check the pattern "ipv4 and tcp" at the session filter layer. pub(super) fn is_prev_layer( &self, filter_layer: FilterLayer, @@ -61,7 +61,7 @@ impl FlatPattern { .all(|p| p.is_prev_layer(filter_layer, subscription_level)) } - /// Returns a vector of fully qualified patterns from self + // Returns a vector of fully qualified patterns from self pub(super) fn to_fully_qualified(&self) -> Result> { if self.is_empty() { return Ok(Vec::new()); @@ -130,7 +130,7 @@ impl FlatPattern { Ok(fq_patterns) } - /// Returns FlatPattern of only predicates that can be filtered in hardware + // Returns FlatPattern of only predicates that can be filtered in hardware pub(super) fn retain_hardware_predicates(&self, port: &Port) -> FlatPattern { FlatPattern { predicates: self @@ -160,7 +160,7 @@ impl fmt::Display for FlatPattern { } } -/// Represents a fully qualified pattern, ordered by header layer +// Represents a fully qualified pattern, ordered by header layer #[derive(Debug, Clone)] pub struct LayeredPattern(LinkedHashMap>); @@ -173,7 +173,7 @@ impl LayeredPattern { self.0.is_empty() } - /// Adds predicates on protocol header. Returns true on success + // Adds predicates on protocol header. Returns true on success fn add_protocol(&mut self, proto_name: ProtocolName, field_predicates: Vec) -> bool { let (layers, labels) = (&*LAYERS, &*NODE_BIMAP); diff --git a/core/src/filter/ptree.rs b/core/src/filter/ptree.rs index 6b461985..91ff5d7d 100644 --- a/core/src/filter/ptree.rs +++ b/core/src/filter/ptree.rs @@ -9,17 +9,17 @@ use std::fmt; #[derive(Debug, Clone, Copy)] pub enum FilterLayer { - /// Quick-pass filter per-packet + // Quick-pass filter per-packet PacketContinue, - /// Packet delivery | packet filter + // Packet delivery | packet filter Packet, - /// Connection (protocol) filter + // Connection (protocol) filter Protocol, - /// Session delivery | session filter + // Session delivery | session filter Session, - /// Connection delivery (conn. termination) + // Connection delivery (conn. termination) ConnectionDeliver, - /// Packet delivery (packet datatype match at later layer) + // Packet delivery (packet datatype match at later layer) PacketDeliver, } @@ -36,9 +36,9 @@ impl fmt::Display for FilterLayer { } } -/// Represents a subscription (callback, datatype) -/// that will be delivered at a given filter node. -/// Used in compile-time filter generation. +// Represents a subscription (callback, datatype) +// that will be delivered at a given filter node. +// Used in compile-time filter generation. #[derive(Hash, Debug, Clone, PartialEq, Eq)] pub struct Deliver { // Subscription ID as given by filtergen module @@ -49,30 +49,30 @@ pub struct Deliver { pub must_deliver: bool, } -/// A node representing a predicate in the tree +// A node representing a predicate in the tree #[derive(Debug, Clone)] pub struct PNode { - /// ID of node + // ID of node pub id: usize, - /// Predicate represented by this PNode + // Predicate represented by this PNode pub pred: Predicate, - /// Actions to apply at this node - /// [for action filters] + // Actions to apply at this node + // [for action filters] pub actions: Actions, - /// Subscriptions to deliver, by index, at this node - /// Empty for non-delivery filters. + // Subscriptions to deliver, by index, at this node + // Empty for non-delivery filters. pub deliver: HashSet, - /// The patterns for which the predicate is a part of + // The patterns for which the predicate is a part of pub patterns: Vec, - /// Child PNodes + // Child PNodes pub children: Vec, - /// Mutually exclusive with the node preceding it in child list + // Mutually exclusive with the node preceding it in child list pub if_else: bool, } @@ -267,25 +267,25 @@ impl fmt::Display for PNode { } } -/// A n-ary tree representing a Filter. -/// Paths from root to leaf represent a pattern for data to match. -/// Filter returns action(s) or delivers data. +// A n-ary tree representing a Filter. +// Paths from root to leaf represent a pattern for data to match. +// Filter returns action(s) or delivers data. #[derive(Debug, Clone)] pub struct PTree { - /// Root node + // Root node pub root: PNode, - /// Number of nodes in tree + // Number of nodes in tree pub size: usize, - /// Possible actions + // Possible actions pub actions: Actions, - /// Which filter this PTree represents + // Which filter this PTree represents pub filter_layer: FilterLayer, - /// Has `collapse` been applied? - /// Use to ensure no filters are applied after `collapse` + // Has `collapse` been applied? + // Use to ensure no filters are applied after `collapse` collapsed: bool, } @@ -303,9 +303,9 @@ impl PTree { } } - /// Add a filter to an existing PTree - /// Applied for multiple subscriptions, when multiple actions - /// and/or delivery filters will be checked at the same stage + // Add a filter to an existing PTree + // Applied for multiple subscriptions, when multiple actions + // and/or delivery filters will be checked at the same stage pub fn add_filter( &mut self, patterns: &[FlatPattern], @@ -367,9 +367,9 @@ impl PTree { } } - /// Add a single pattern (root-to-leaf path) to the tree. - /// Add nodes that don't exist. Update actions or subscription IDs - /// for terminal nodes at this stage. + // Add a single pattern (root-to-leaf path) to the tree. + // Add nodes that don't exist. Update actions or subscription IDs + // for terminal nodes at this stage. fn add_pattern( &mut self, pattern: &FlatPattern, @@ -441,7 +441,7 @@ impl PTree { } } - /// Returns a copy of the subtree rooted at Node `id` + // Returns a copy of the subtree rooted at Node `id` pub fn get_subtree(&self, id: usize) -> Option { fn get_subtree(id: usize, node: &PNode) -> Option { if node.id == id { @@ -702,8 +702,8 @@ impl PTree { prune_redundant_branches(&mut self.root, self.filter_layer, can_prune_next); } - /// Apply all filter tree optimizations. - /// This must only be invoked AFTER the tree is completely built. + // Apply all filter tree optimizations. + // This must only be invoked AFTER the tree is completely built. pub fn collapse(&mut self) { if matches!( self.filter_layer, @@ -767,7 +767,7 @@ impl PTree { layered } - /// modified from https://vallentin.dev/2019/05/14/pretty-print-tree + // modified from https://vallentin.dev/2019/05/14/pretty-print-tree fn pprint(&self) -> String { fn pprint(s: &mut String, node: &PNode, prefix: String, last: bool) { let prefix_current = if last { "`- " } else { "|- " }; diff --git a/core/src/filter/ptree_flat.rs b/core/src/filter/ptree_flat.rs index 368b6496..a3e0461b 100644 --- a/core/src/filter/ptree_flat.rs +++ b/core/src/filter/ptree_flat.rs @@ -3,7 +3,7 @@ use super::pattern::{FlatPattern, LayeredPattern}; use std::fmt; -/// Represents the sub-filter that a predicate node terminates. +// Represents the sub-filter that a predicate node terminates. #[derive(Debug, Clone)] pub enum Terminate { Packet, @@ -23,25 +23,25 @@ impl fmt::Display for Terminate { } } -/// A node representing a predicate in the tree +// A node representing a predicate in the tree #[derive(Debug, Clone)] pub struct FlatPNode { - /// ID of node + // ID of node pub id: usize, - /// Predicate represented by this FlatPNode + // Predicate represented by this FlatPNode pub pred: Predicate, - /// Whether the node terminates a pattern + // Whether the node terminates a pattern pub is_terminal: bool, - /// Sub-filter terminal (packet, connection, or session) + // Sub-filter terminal (packet, connection, or session) pub terminates: Terminate, - /// The patterns for which the predicate is a part of + // The patterns for which the predicate is a part of pub patterns: Vec, - /// Child FlatPNodes + // Child FlatPNodes pub children: Vec, } @@ -73,19 +73,21 @@ impl fmt::Display for FlatPNode { } } -/// A n-ary tree representing a Filter. -/// Paths from root to leaf represent a pattern for a frame to match. +// A n-ary tree representing a boolean (drop/keep) filter. +// This is used only for installing hardware filters and +// for validating filter syntax. +// Paths from root to leaf represent a pattern for a frame to match. #[derive(Debug)] pub struct FlatPTree { - /// Root node + // Root node pub root: FlatPNode, - /// Number of nodes in tree + // Number of nodes in tree pub size: usize, } impl FlatPTree { - /// Creates a new predicate tree from a slice of FlatPatterns + // Creates a new predicate tree from a slice of FlatPatterns pub fn new(patterns: &[FlatPattern]) -> Self { let root = FlatPNode { id: 0, @@ -202,7 +204,7 @@ impl FlatPTree { } } - /// Returns a copy of the subtree rooted at Node `id` + // Returns a copy of the subtree rooted at Node `id` pub fn get_subtree(&self, id: usize) -> Option { fn get_subtree(id: usize, node: &FlatPNode) -> Option { if node.id == id { @@ -218,8 +220,8 @@ impl FlatPTree { get_subtree(id, &self.root) } - /// Returns list of subtrees rooted at packet terminal nodes. - /// Used to generate connection filter. + // Returns list of subtrees rooted at packet terminal nodes. + // Used to generate connection filter. pub fn get_connection_subtrees(&self) -> Vec { fn get_connection_subtrees(node: &FlatPNode, list: &mut Vec) { if matches!(node.terminates, Terminate::Packet) { @@ -234,8 +236,8 @@ impl FlatPTree { list } - /// Returns list of subtrees rooted at connection terminal nodes. - /// Used to generate session filter. + // Returns list of subtrees rooted at connection terminal nodes. + // Used to generate session filter. pub fn get_session_subtrees(&self) -> Vec { fn get_session_subtrees(node: &FlatPNode, list: &mut Vec) { if matches!(node.terminates, Terminate::Connection) { @@ -250,8 +252,8 @@ impl FlatPTree { list } - /// Removes some patterns that are covered by others, but not all. - /// (e.g. "ipv4 or ipv4.src_addr = 1.2.3.4" will remove "ipv4.src_addr = 1.2.3.4") + // Removes some patterns that are covered by others, but not all. + // (e.g. "ipv4 or ipv4.src_addr = 1.2.3.4" will remove "ipv4.src_addr = 1.2.3.4") pub fn prune_branches(&mut self) { fn prune(node: &mut FlatPNode) { if node.is_terminal { @@ -264,7 +266,7 @@ impl FlatPTree { prune(&mut self.root); } - /// modified from https://vallentin.dev/2019/05/14/pretty-print-tree + // modified from https://vallentin.dev/2019/05/14/pretty-print-tree fn pprint(&self) -> String { fn pprint(s: &mut String, node: &FlatPNode, prefix: String, last: bool) { let prefix_current = if last { "`- " } else { "|- " }; diff --git a/core/src/lib.rs b/core/src/lib.rs index f5e9e6b6..5b97724c 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -65,8 +65,8 @@ //! ``` //! //! For programs that require many filters (e.g., searching for 100s of attack signatures), using -//! the [subscription](retina_filtergen::subscription) macro to specify an input TOML file may -//! be preferable to specifying each subscription individually as above. +//! the [subscription](../retina_filtergen/attr.subscription.html) macro to specify an input TOML +//! file may be preferable to specifying each subscription individually as above. //! #[macro_use] @@ -76,9 +76,6 @@ pub mod conntrack; #[doc(hidden)] #[allow(clippy::all)] mod dpdk; -// The filter module must be public to be accessible by the filter_gen procedural macro crate. -// However, module functions should be opaque to users, so documentation is hidden by default. -#[doc(hidden)] pub mod filter; pub mod lcore; pub mod memory; diff --git a/datatypes/src/lib.rs b/datatypes/src/lib.rs index c0d49882..2acb05d4 100644 --- a/datatypes/src/lib.rs +++ b/datatypes/src/lib.rs @@ -7,9 +7,9 @@ //! //! Each subscribable datatype must: //! -//! - Be defined as a [retina_core::filter::DataType], with appropriate parameters and [retina_core::filter::Level]. +//! - Be defined as a [DataType](retina_core::filter::DataType), with appropriate parameters and [retina_core::filter::Level]. //! - Implement one of the traits defined in this module (Tracked, FromSession, etc.) -//! - Be added to the [crate::typedefs::DATATYPES] map +//! - Be added to the [DATATYPES](`crate::typedefs::DATATYPES`) map (note: we are actively working on an approach that eliminates this requirement). //! //! diff --git a/datatypes/src/static_type.rs b/datatypes/src/static_type.rs index 21877aaa..be2435ce 100644 --- a/datatypes/src/static_type.rs +++ b/datatypes/src/static_type.rs @@ -1,6 +1,7 @@ //! Static-level datatypes. //! A data type is considered "static" if it can be inferred at or before //! the first packet in a connection and it stays constant throughout a connection. +//! See datatypes, including foreign types, that implement [StaticData](trait.StaticData.html). use super::StaticData; use pnet::datalink::MacAddr; diff --git a/datatypes/src/typedefs.rs b/datatypes/src/typedefs.rs index a07e2ffb..bc8dbbc6 100644 --- a/datatypes/src/typedefs.rs +++ b/datatypes/src/typedefs.rs @@ -96,8 +96,7 @@ lazy_static! { /// Core ID is a special case, as it cannot be derived from connection, /// session, or packet data. It is simpler to define it as a directly tracked datatype. /// - /// The directly tracked datatypes are: PacketList, SessionList, and CoreId - #[doc(hidden)] + /// The directly tracked datatypes are SessionList and CoreId pub static ref DIRECTLY_TRACKED: HashMap<&'static str, &'static str> = HashMap::from([ ("SessionList", "sessions"), ("CoreId", "core_id") diff --git a/docs/DEVELOPER.md b/docs/DEVELOPER.md new file mode 100644 index 00000000..6f599e66 --- /dev/null +++ b/docs/DEVELOPER.md @@ -0,0 +1,203 @@ +# Retina (v1.0.0) Detailed Developer Documentation + +This document is intended as a resource for Retina developers and contributors. It is meant to supplement the core [developer documentation](https://stanford-esrg.github.io/retina/retina_core/) and the [original paper](https://zakird.com/papers/retina.pdf). It contains a more detailed and centralized system design description than the documentation. Compared to the original paper, it both reflects more recent changes and inlines links to relevant code. + +Retina is in its early stages, and we are actively working on new releases and research that will significantly expand its utility. We welcome contributors and open issues, and we would love to hear about your use-cases (especially if Retina doesn't suit your needs!). + +## Subscribing to Traffic + +A Retina application consists of one or more "subscriptions", each of which includes datatype(s), a callback, and a filter. For each subscription, the *callback* will be invoked for all network traffic that matches the *filter*. This traffic will be delivered to the requested callback in the form of assembled *datatype(s)* (e.g., a parsed TLS handshake). As [documented](https://stanford-esrg.github.io/retina/retina_filtergen/index.html), subscriptions can be defined in a `toml` or directly in a `.rs` file. + +All subscriptions are applied to (five-tuple) connections. It is currently impossible to filter or request datatypes that cross connection boundaries (for example, multiple flows initiated by the same host). For applications that require this, we recommend multiple callbacks with a shared data structure (e.g., indexed by port, source IP, etc). + +Each callback is invoked at a pre-defined "stage" in the connection, which is determined by (1) when *all* requested datatypes can be fully assembled and (2) when the filter can be definitively matched (or not). These stages are (1) on connection termination, (2) per-packet (after filter match), or (3) when an L7 protocol session (e.g., TLS handshake) is fully parsed. We expect to support streaming data by early 2025. + +## Notes on Writing a Retina Application + +We encourage and welcome contributions to the [examples](https://github.com/stanford-esrg/retina/tree/main/examples) directory! + +Available datatypes can be found in Retina's [datatypes crate](https://github.com/stanford-esrg/retina/tree/main/datatypes). The filter language is documented in the [filtergen crate](https://github.com/stanford-esrg/retina/blob/main/filtergen/src/lib.rs). + +In Retina v0.1.0, callbacks could be defined as closures. In Retina v1, they cannot; this means that any state must be defined statically (e.g., OnceCell, lazy_static) with appropriate synchronization. The CoreId (static) datatype can be used to create zero-lock data structures, as in many of the [examples](https://github.com/stanford-esrg/retina/tree/main/examples). + +In 2025, we intend to extend Retina's filter expressiveness significantly, add support for streaming datatypes (e.g., "invoke callback every Nth packet"), support callback-like filters (e.g., to apply a custom heuristic), and introduce utilities that make writing callbacks easier (e.g., cross-core message passing). + +### Performance Considerations + +Some subscriptions are inherently more expensive than others. In general, we find that the performance of a Retina application depends most on: +- **Callback complexity**: If the application spends too long (CPU cycles) in a callback, DPDK's [Rx queue](https://doc.dpdk.org/guides/prog_guide/ring_lib.html) may fill up and begin to drop packets. +- **Filter breadth**: Retina's multi-stage filter pipeline eagerly discards out-of-scope traffic to reduce computational burden at subsequent processing steps. If a filter requires processing a large portion of traffic, Retina's filtering infrastructure does not provide performance benefit. +- **Datatype complexity**: some datatypes are more expensive to process than others. For example, we have found that packet list datatypes, which cache raw packets, may exhaust DPDK's mempool resources. Similarly, datatypes that require parsing and reassembly (which can be computationally expensive) throughout connections may encounter CPU bottlenecks. +- **Network conditions**: Very lossy networks will require expensive TCP reassembly (see "stream reassembly"), which can create both CPU and mempool bottlenecks. + +### Datatypes + +Subscriptions cannot be delivered until (1) a filter has definitively matched and (2) all datatypes are fully assembled. The latter is dictated by the datatype's `Level` ([source](https://github.com/stanford-esrg/retina/blob/main/core/src/filter/datatypes.rs)). + +* A *Connection-Level* datatype can only be fully assembled by the end of the connection. Examples include connection duration, a list of inter-packet arrival times, and TCP flag history. Any subscription that includes a connection-level datatype will be delivered when the connection terminates. +* A *Session-Level* datatype is fully assembled -- and can be delivered -- when the L7 session (TLS handshake, HTTP transaction, etc.) is fully parsed. However, if a session-level datatype is requested alongside a connection-level datatype, the session will be cached until the connection terminates. +* A *Packet-Level* datatype (e.g., raw frame, per-packet payload) can be delivered as soon as a corresponding filter matches. A raw packet cannot be requested in the same subscription as a higher-level datatype, however Retina does provide packet lists. +* A *Static-Level* datatype is constant throughout a connection and inferrable at first packet (e.g., five-tuple, Core ID). It can be delivered when its filter matches and other datatypes in its subscription can be delivered. +* We expect to support more flexible and streaming data delivery stages by early 2025. For example, we aim to support delivery at "the first N packets", "every Nth packet", "every 10 seconds", etc. + +All defined datatypes specify the operations they require (e.g., parsing, pre-reassembly update) via the Datatype struct ([source](https://github.com/stanford-esrg/retina/blob/main/core/src/filter/datatypes.rs)) and implement pre-defined methods that can be invoked by the framework ([source](https://github.com/stanford-esrg/retina/blob/main/datatypes/src/lib.rs)). + +For example, many "connection feature" datatypes (e.g., packet count, TCP flag history) require an `update` operation when a new packet is received. Most "session" datatypes require parsing and provide a `from_session` API. + +#### Filtering for Raw Packets + +For any datatype, it is currently impossible to filter for both (1) fields that can *only* be checked in individual packets and (2) fields that can *only* be checked at a parsing layer. For example, you cannot filter for "tls packets that are larger than N bytes" or "SYN packets in an HTTP connection with a specific user-agent" ([issue](https://github.com/stanford-esrg/retina/issues/66)). + +To achieve similar behavior, users can define new datatypes and/or subscribe to packet lists. For example, to subscribe to "tls packets that are larger than N bytes", one could define a connection-level datatype that caches packets larger than N bytes and subscribe to this datatype with a "tls" filter. + +#### Subscribing to Raw Packets + +Packet-level subscriptions can only support one datatype (alongside, optionally, the Core ID on which the packet is processed). For those who wish to subscribe to raw packets *and* higher-level data, or who wish to receive multiple packets in one callback invocation, see the PacketList [datatypes](https://github.com/stanford-esrg/retina/tree/main/datatypes/src). + +When subscribing to a Packet-Level datatype with a filter that matches at a connection or session level, the callback will be invoked for all packets in the connection. For example, requesting raw packets with a "tls.sni ~ abc" filter will deliver all packets in all connections for which the TLS sni matches "abc", including ciphertext. + +#### Subscribing to Static Datatypes + +A subscription for a static datatype only (e.g., FiveTuple) will be delivered once per connection and as soon as the relevant filter has matched. + +## Interpreting Runtime Output + +Depending on [configuration](https://github.com/stanford-esrg/retina/blob/main/core/src/config.rs), Retina can print and/or log statistics [queried via DPDK](https://github.com/stanford-esrg/retina/blob/main/core/src/lcore/monitor.rs). + +Mempool availability indicates the percentage of each DPDK mempool (not RAM) consumed by the application. In online mode, Retina initializes one mempool per socket. This may be high if raw `Mbufs` are retained in memory for too long (likely for a PacketList datatype or reassembly). + +The typical NIC statistics are as follows: +- Ingress: packets/bytes that hit the NIC (`rx_phy_packets`/`rx_phy_bytes` in DPDK) +- Good: packets/bytes after hardware filtering and CRC checking (`rx_good_packets` / `rx_good_bytes` in DPDK) +- Process: packets/bytes received by lcores running the RX loop *after flow sampling* (sink core) if applicable (`rx_good_* - sink_*`) +- HW dropped: packets reported dropped by the NIC due to lack of physical buffers (`rx_phy_discard_packets` in DPDK) +- SW dropped: packets received on the NIC but not processed by software, typically due to lack of mbufs or space in an RX queue (`rx_missed_errors` in DPDK). + +## Interpreting Compile-Time Output + +An important part of Retina pipeline occurs at *compile-time*. This code is defined in the [filtergen](https://github.com/stanford-esrg/retina/tree/main/filtergen/src) crate. At a high level, this crate generates: +- Filtering logic: all filters for all subscriptions are decomposed into filter "layers" (e.g., per-packet, following session parse, etc.), combined with each other, and optimized to remove redundant or unnecessary conditions. +- Datatype wrapper: all datatypes for all subscriptions are deduplicated and combined into a Tracked datatype, which is [Trackable](https://github.com/stanford-esrg/retina/blob/main/core/src/subscription/mod.rs) trait. +- Callback invocation: callbacks are invoked within the generated filters. + +During compilation, Retina prints out the generated filters (including callback invocations) and datatypes, as well as the application-layer protocol parsers required by the application. + +### Filters + +Filters are represented as a trie. Not all filter layers will be applied for all applications; for example, an application that does not require L7 protocol parsing will not apply the "Session" filter. + +The following filter layers may be present: +- *Pkt (pass)*: applied to each packet, possibly in hardware, which will either drop packets, "pass" them to the connection handler, and/or deliver them. + - If hardware support is true, a "drop/keep" version of this filter will also be installed on the NICs. +- *Pkt*: applied to the first packet in each connection. This initializes Actions for each connection. +- *Proto*: applied when the L7 protocol is identified. This will either drop the connection or pass it to a subsequent processing stage. +- *Session*: applied when the L7 protocol session is parsed. This will either drop the session (and potentially the connection), deliver the session, and/or pass the connection to a subsequent processing stage. +- *Connection (Deliver)*: applied when the connection terminates. This will deliver any Connection-level subscriptions. +- *Pkt (Deliver)*: applied per-packet if (1) a connection requires all packets to be delivered and (2) they cannot be delivered in the Packet filter. This is relevant when a subscription requests a stateful/L7 filter (e.g., "tls") with a packet-level datatype. +- We expect to support streaming filters (e.g., "apply every N packets") by early 2025. + +The filter trees will include callback invocations and/or actions. + +Each "action" indicates to the runtime *what operation(s) it should apply next* to a given packet, connection, or session. For example, a subscription for "dns" traffic and the "DnsTransaction" datatype will require -- if it has matched -- session parsing after the "protocol identified" filter. "Terminal actions" are retained for the duration of the connection. Actions are described in more detail below. + +Some optimizations are applied to the filters at compile-time. For example, the framework will attempt to avoid reapplying conditions that are guaranteed to be true based on the results of previous filters. An `x` in the filter output indicates mutual exclusion and corresponds to an `else if` condition in code. + +## Sink Cores and Flow Sampling + +Users can configure flow sampling by configuring a [sink core](https://stanford-esrg.github.io/retina/retina_core/config/struct.SinkConfig.html). RSS directs a subset of flows to the sink core, which immediately drops them. + +One sink core is required per interface. Note that sink cores currently are not compatible with hardware filtering. `hardware_assist` should be disabled when configuring a sink core ([issue](https://github.com/stanford-esrg/retina/issues/80)). + +## Retina Pipeline + +Retina compiles subscriptions into a work-conserving pipeline that (1) eagerly discards out-of-scope traffic, (2) lazily reconstructs relevant network data, and (3) efficiently shares processing work and data between subscriptions. + +- Most analysis questions require fully processing only a subset of Internet traffic. A multi-stage filtering infrastructure allows Retina to iteratively discard extraneous traffic as early and often as possible, dramatically reducing unnecessary computation. +- Retina's runtime pipeline is designed to minimize wasted computation on traffic that will be discarded by later filters. The framework defers expensive operations until it is confident that the operation is needed to achieve the desired analysis result. Multi-stage filtering also allow the framework to stop expensive operations early. +- Multiple subscriptions are executed by a single runtime pipeline. At compile-time, the system leverages a global view of all subscriptions -- filters, datatypes, and callbacks -- to build and optimize a pipeline that eliminates redundant and unnecessary operations. + +The high-level architecture of Retina is similar to that described in our original paper. The core runtime components are the same. To support multiple subscriptions, the compile-time pipeline includes an analysis and optimization stage, which (1) combines and optimizes filters and (2) combines and outputs tracked data. + +### Compile-Time Processing + +Compile-time code generation is handled by the [retina-filtergen crate](https://github.com/stanford-esrg/retina/tree/main/filtergen), which invokes helpers from the core library's [filter](https://github.com/stanford-esrg/retina/tree/main/core/src/filter) and [protocol](https://github.com/stanford-esrg/retina/tree/main/core/src/protocols) modules. At a high level, the framework ingests and combines all subscriptions to create (1) the (multi-stage) filters described above and (2) a trackable "wrapper" struct representing the union of all requested datatypes and implementing the [Trackable](https://github.com/stanford-esrg/retina/blob/main/core/src/subscription/mod.rs) trait. + +The compile-time pipeline begins by translating each subscription into a structured specification that includes the callback, datatype(s), and filter. The retina-datatypes crate provides necessary information about each datatype: what `Level` it can be delivered at and the operations it requires. + +The framework [parses]((https://github.com/stanford-esrg/retina/blob/main/core/src/filter/parser.rs)) each filter into a list of disjunct [Patterns](https://github.com/stanford-esrg/retina/blob/main/core/src/filter/pattern.rs), each of which contains one or more [Predicates](https://github.com/stanford-esrg/retina/blob/main/core/src/filter/ast.rs). Predicates are either explicit in the filter or inferred from protocol layers (see [`LAYERS`](https://github.com/stanford-esrg/retina/blob/main/core/src/filter/ast.rs)). (For example, the filter "tcp.port = 80 and http" would become two Patterns: [["ethernet", "ipv4", "tcp", "tcp.port = 80", "http"], ["ethernet", "ipv6", "tcp", "tcp.port = 80", "http"]].) Retina uses [`pest`](https://pest.rs) to define the filter [grammar](https://github.com/stanford-esrg/retina/blob/main/core/src/filter/grammar.pest). + +The framework builds a predicate trie ([PTree](https://github.com/stanford-esrg/retina/blob/main/core/src/filter/ptree.rs)) for each filter layer (packet, session, connection delivery, etc.). Conceptually, each pattern becomes a root-to-node path which input data must match in order to satisfy a filter. As noted above, PTrees capture *the union of all subscriptions in an application*. + +Each PTree node that terminates a Pattern will contain either `Actions` or callback invocations. `Actions` are [determined by](https://github.com/stanford-esrg/retina/blob/main/core/src/filter/datatypes.rs) the `Datatype`s in a subscription and filter state. Note that each PTree stage contains only those predicates which can be definitive checked at its stage (e.g., the tls SNI cannot be checked per-packet). The Actions for a "matching" (partially matched) filter differ from those of a fully matched filter. + +Below is one example of the generated PTrees for a Retina application with two subscriptions. + + + +The filtergen crate uses each PTree to generate filter code. Filters return Actions. Callbacks -- including data reconstruction, if necessary -- are inlined within filters. + +Finally, the filtergen crate combines the required datatypes into one [Trackable](https://github.com/stanford-esrg/retina/blob/main/core/src/subscription/mod.rs) type, which provides methods that can be invoked by the runtime. + +**Use [`cargo expand`](https://crates.io/crates/cargo-expand) to view the generated code for your application.** + +### Runtime System + +At a high level, Retina receives raw packets from the network and builds increasingly more complex segments of data up through each filtering layer while executing callback(s) with the user's subscription data. The runtime framework consists of (1) stateless packet processing and (2) stateful connection tracking. + +#### Stateless Packet Processing + +- The NIC applies an initial filter to ingress packets. This hardware filter is installed at application startup [if configured](https://stanford-esrg.github.io/retina/retina_core/config/struct.OnlineConfig.html) and supported by the NIC. +- Receive Side Scaling (RSS) distributes packets among cores for software packet-processing. (Packets associated to the same five-tuple are forwarded to a consistent core.) +- Retina uses DPDK to deliver raw packets from the NIC directly to user-space memory. +- Packets are filtered again in software to discard those that cannot be filtered in hardware (e.g., due to unsupported header fields or operands) and to deliver subscriptions that do not require connection traffic. +- If required by the subscription data type and result of the packet filter, Retina forwards packets to the Connection Tracker. + +#### Stateful Connection Processing + +Each core maintains a connection [table](https://github.com/stanford-esrg/retina/blob/main/core/src/conntrack/mod.rs) that maps connection IDs (five-tuples) to connection state (parsers, tracked data, required operations, etc.). + +New connections are inserted when the connection tracker receives (1) a TCP SYN or (2) a UDP packet that does not correspond to an existing connection. + +To avoid memory exhaustion from inactive connections, Retina implements a [timer wheel](https://github.com/stanford-esrg/retina/blob/main/core/src/conntrack/timerwheel.rs) that regularly polls for and removes [inactive](https://stanford-esrg.github.io/retina/retina_core/config/struct.ConnTrackConfig.html) connections. Retina applies both a short connection establishment timeout to expire unanswered SYNs and longer inactivity timeouts to remove established inactive connections (TCP and UDP inactivity timeouts are configured separately). + +A connection is "terminated" either when it times out or, in the case of TCP, when both sides have sent FINs and ACKs for FINs. Note that this can make UDP connection processing more expensive; in the case of connection-level datatypes, state for matched connections may need to be maintained until the connection times out. When a UDP connection is discarded, its ID, without associated state, is retained in the connection table for a timeout period to prevent spurious re-insertion. + +The per-connection logic is largely implemented in the [conntrack/conn](https://github.com/stanford-esrg/retina/tree/main/core/src/conntrack/conn) directory. Maintained state for each connection typically includes the currently-required Actions, application-specific Trackable data, application-layer parsers, TCP reassembly (if applicable), and timestamps (for expiration). + +#### Multiple Subscriptions + +We can consider the runtime control flow for each subscription as (1) stateless packet-processing followed by (2) a per-connection state machine, where each "state" corresponds to required Actions (e.g., reassembly, parsing) and filter stages execute state transitions. The compile-time process is responsible for combining the subscriptions. + +At compile-time, the system leverages a global view of all subscriptions -- filters, datatypes, and callbacks -- to build each subscription-specific state machine and compose them into a single per-connection state machine. Processing work is shared between subscriptions, and per-subscription disambiguation is deferred until the delivery stage. In addition to discarding out-of-scope *traffic*, state transitions discard out-of-scope *actions* as the subscriptions requiring them fail to match. + +For example, consider a basic subscription that subscribes to TLS handshakes with different datatypes. + + + +The figures below illustrate the control flow for these subscriptions. Both subscriptions require TCP reassembly, L7 protocol probing, and TLS handshake parsing, as well as infrastructure such as packet processing and connection tracking. By combining the processing pipeline (second figure below), the compile-time framework minimizes repeated work. Disambiguation -- checking the TLS SNI and TCP port -- are deferred until the per-subscription state machines diverge. + + + + + +### Stream Reassembly + +Retina's TCP reassembly infrastructure re-orders raw packets as they arrive. The system caches out-of-order Mbufs into a configurable-length ring buffer which is flushed when the next expected segment arrives. + +This approach is far more computationally efficient than reconstructing byte-streams by copying payloads into a receive buffer, as in traditional approaches to reassembly. However, it introduces a tradeoff with mempool utilization. On a lossy network, Mbufs are cached for reordering and may not be returned quickly enough to DPDK's memory pools. This can lead to high mempool utilization. We are exploring alternate approaches, such as initiating copies if observed sequence number gaps reach a certain threshold. + +### Parsing and Sessions + +Retina's application-layer (L7 protocol) parsers are defined in the [protocols/stream](https://github.com/stanford-esrg/retina/tree/main/core/src/protocols/stream) directory in the core library. Each Retina application builds a `ParserRegistry` from the union of L7 protocols required by all filters and all datatypes. The `ParserRegistry` exposes APIs that the [connection management system](https://github.com/stanford-esrg/retina/blob/main/core/src/conntrack/conn/conn_info.rs) invokes to identify and parse application-layer data. + +Protocol parsers have two primary roles: probing and parsing. +- *Probing* (protocol identification): During a *probing* stage, each registered parser ingests each packet to determine whether a given connection matches its protocol. + - When probing is complete, the `protocol_filter` can be applied. +- *Session parsing*: During a *parsing* stage, the parser identified from probing ingests each packets to assemble a `Session` (an enum containing the parsed datatype). + - When parsing is complete, the `session_filter` can be applied and session-level subscriptions can be invoked. + +Probing and parsing operations are invoked by the [connection management](https://github.com/stanford-esrg/retina/blob/main/core/src/conntrack/conn/conn_info.rs) infrastructure when required by `actions`. [`FromSession` datatypes](https://github.com/stanford-esrg/retina/blob/main/datatypes/src/lib.rs) are built from sessions returned by the framework's parsers. + +We aim to support more application-layer protocols in the future, and we welcome contributions! + + diff --git a/docs/figures/combined_states_example.png b/docs/figures/combined_states_example.png new file mode 100644 index 00000000..f45c5392 Binary files /dev/null and b/docs/figures/combined_states_example.png differ diff --git a/docs/figures/filtergen_example.png b/docs/figures/filtergen_example.png new file mode 100644 index 00000000..f1653cba Binary files /dev/null and b/docs/figures/filtergen_example.png differ diff --git a/docs/figures/tls_single_flow_multi.png b/docs/figures/tls_single_flow_multi.png new file mode 100644 index 00000000..a7e5d03e Binary files /dev/null and b/docs/figures/tls_single_flow_multi.png differ diff --git a/docs/figures/tls_sub_ex_code.png b/docs/figures/tls_sub_ex_code.png new file mode 100644 index 00000000..2857d3b3 Binary files /dev/null and b/docs/figures/tls_sub_ex_code.png differ diff --git a/filtergen/src/lib.rs b/filtergen/src/lib.rs index 125b4efe..54decda8 100644 --- a/filtergen/src/lib.rs +++ b/filtergen/src/lib.rs @@ -87,7 +87,7 @@ //! //! # Datatype syntax //! All subscribed datatypes -- parameters to callbacks -- must be requested by reference. -//! Supported datatypes are defined in the [retina_datatypes](../datatypes) crate. +//! Supported datatypes are defined in the [retina_datatypes](../retina_datatypes) crate. //! //! # Filter syntax //! The Retina filter syntax is similar to that of [Wireshark display @@ -395,8 +395,8 @@ pub fn filter(args: TokenStream, input: TokenStream) -> TokenStream { generate(input, config) } -// For generating a Retina program without a specification file -// This expects to receive the number of subscriptions +/// For generating a Retina program without a specification file +/// This expects to receive the number of subscriptions #[proc_macro_attribute] pub fn retina_main(args: TokenStream, input: TokenStream) -> TokenStream { let input = parse_macro_input!(input as syn::ItemFn);