-
Notifications
You must be signed in to change notification settings - Fork 45
/
homa_wire.h
485 lines (431 loc) · 17 KB
/
homa_wire.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
/* SPDX-License-Identifier: BSD-2-Clause */
/* This file defines the on-the-wire format of Homa packets. */
#ifndef _HOMA_WIRE_H
#define _HOMA_WIRE_H
#include <linux/skbuff.h>
/**
* enum homa_packet_type - Defines the possible types of Homa packets.
*
* See the xxx_header structs below for more information about each type.
*/
enum homa_packet_type {
DATA = 0x10,
GRANT = 0x11,
RESEND = 0x12,
UNKNOWN = 0x13,
BUSY = 0x14,
CUTOFFS = 0x15,
FREEZE = 0x16,
NEED_ACK = 0x17,
ACK = 0x18,
BOGUS = 0x19, /* Used only in unit tests. */
/* If you add a new type here, you must also do the following:
* 1. Change BOGUS so it is the highest opcode
* 2. Add support for the new opcode in homa_print_packet,
* homa_print_packet_short, homa_symbol_for_type, and mock_skb_new.
* 3. Add the header length to header_lengths in homa_plumbing.c.
*/
};
/** define HOMA_IPV6_HEADER_LENGTH - Size of IP header (V6). */
#define HOMA_IPV6_HEADER_LENGTH 40
/** define HOMA_IPV4_HEADER_LENGTH - Size of IP header (V4). */
#define HOMA_IPV4_HEADER_LENGTH 20
/**
* define HOMA_SKB_EXTRA - How many bytes of additional space to allow at the
* beginning of each sk_buff, before the IP header. This includes room for a
* VLAN header and also includes some extra space, "just to be safe" (not
* really sure if this is needed).
*/
#define HOMA_SKB_EXTRA 40
/**
* define HOMA_ETH_OVERHEAD - Number of bytes per Ethernet packet for Ethernet
* header, CRC, preamble, and inter-packet gap.
*/
#define HOMA_ETH_OVERHEAD 42
/**
* define HOMA_MIN_PKT_LENGTH - Every Homa packet must be padded to at least
* this length to meet Ethernet frame size limitations. This number includes
* Homa headers and data, but not IP or Ethernet headers.
*/
#define HOMA_MIN_PKT_LENGTH 26
/**
* define HOMA_MAX_HEADER - Number of bytes in the largest Homa header.
*/
#define HOMA_MAX_HEADER 90
/**
* define HOMA_MAX_PRIORITIES - The maximum number of priority levels that
* Homa can use (the actual number can be restricted to less than this at
* runtime). Changing this value will affect packet formats.
*/
#define HOMA_MAX_PRIORITIES 8
/**
* struct homa_common_hdr - Wire format for the first bytes in every Homa
* packet. This must (mostly) match the format of a TCP header to enable
* Homa packets to actually be transmitted as TCP packets (and thereby
* take advantage of TSO and other features).
*/
struct homa_common_hdr {
/**
* @sport: Port on source machine from which packet was sent.
* Must be in the same position as in a TCP header.
*/
__be16 sport;
/**
* @dport: Port on destination that is to receive packet. Must be
* in the same position as in a TCP header.
*/
__be16 dport;
/**
* @sequence: corresponds to the sequence number field in TCP headers;
* used in DATA packets to hold the offset in the message of the first
* byte of data. However, when TSO is used without TCP hijacking, this
* value will only be correct in the first segment of a GSO packet.
*/
__be32 sequence;
/* The fields below correspond to the acknowledgment field in TCP
* headers; not used by Homa, except for the low-order 8 bits, which
* specify the Homa packet type (one of the values in the
* homa_packet_type enum).
*/
__be16 ack1;
__u8 ack2;
__u8 type;
/**
* @doff: High order 4 bits holds the number of 4-byte chunks in a
* homa_data_hdr (low-order bits unused). Used only for DATA packets;
* must be in the same position as the data offset in a TCP header.
* Used by TSO to determine where the replicated header portion ends.
*/
__u8 doff;
/**
* @flags: Holds TCP flags such as URG, ACK, etc. The special value
* HOMA_TCP_FLAGS is stored here to distinguish Homa-over-TCP packets
* from real TCP packets. It includes the SYN and RST flags,
* which TCP would never use together; must not include URG or FIN
* (TSO will turn off FIN for all but the last segment).
*/
__u8 flags;
#define HOMA_TCP_FLAGS 6
/**
* @window: Corresponds to the window field in TCP headers. Not used
* by HOMA.
*/
__be16 window;
/**
* @checksum: not used by Homa, but must occupy the same bytes as
* the checksum in a TCP header (TSO may modify this?).
*/
__be16 checksum;
/**
* @urgent: occupies the same bytes as the urgent pointer in a TCP
* header. When Homa packets are transmitted over TCP, this has the
* special value HOMA_TCP_URGENT (which is set even though URG is
* not set) to indicate that the packet is actually a Homa packet.
*/
__be16 urgent;
#define HOMA_TCP_URGENT 0xb97d
/**
* @sender_id: the identifier of this RPC as used on the sender (i.e.,
* if the low-order bit is set, then the sender is the server for
* this RPC).
*/
__be64 sender_id;
} __packed;
/**
* struct homa_ack - Identifies an RPC that can be safely deleted by its
* server. After sending the response for an RPC, the server must retain its
* state for the RPC until it knows that the client has successfully
* received the entire response. An ack indicates this. Clients will
* piggyback acks on future data packets, but if a client doesn't send
* any data to the server, the server will eventually request an ack
* explicitly with a NEED_ACK packet, in which case the client will
* return an explicit ACK.
*/
struct homa_ack {
/**
* @client_id: The client's identifier for the RPC. 0 means this ack
* is invalid.
*/
__be64 client_id;
/** @server_port: The server-side port for the RPC. */
__be16 server_port;
} __packed;
/* struct homa_data_hdr - Contains data for part or all of a Homa message.
* An incoming packet consists of a homa_data_hdr followed by message data.
* An outgoing packet can have this simple format as well, or it can be
* structured as a GSO packet. Homa supports two different formats for GSO
* packets, depending on whether TCP hijacking is enabled:
*
* No hijacking: TCP hijacking:
*
* |-----------------------| |-----------------------|
* | | | |
* | homa_data_hdr | | homa_data_hdr |
* | | | |
* |---------------------- | |-----------------------|
* | | | |
* | | | |
* | segment data | | segment data |
* | | | |
* | | | |
* |-----------------------| |-----------------------|
* | homa_seg_hdr | | |
* |-----------------------| | |
* | | | segment data |
* | | | |
* | segment data | | |
* | | |-----------------------|
* | | | |
* |-----------------------| | |
* | homa_seg_hdr | | segment data |
* |-----------------------| | |
* | | | |
* | | |-----------------------|
* | segment data |
* | |
* | |
* |-----------------------|
*
* With TCP hijacking, TSO will automatically adjust @common.sequence in
* the segments, so that value can be used as the offset of the data within
* the message. Without TCP hijacking, TSO will not adjust @common.sequence
* in the segments, so Homa sprinkles correct offsets (in homa_seg_hdrs)
* throughout the segment data; TSO/GSO will include a different homa_seg_hdr
* in each generated packet.
*/
struct homa_seg_hdr {
/**
* @offset: Offset within message of the first byte of data in
* this segment. If this field is -1 it means that the packet was
* generated by GSO with TCP hijacking. In this case the true offset
* is in @common.sequence. homa_gro_receive detects this situation
* and updates this value from @common.sequence if needed, so the
* value will always be valid once the packet reaches homa_softirq.
*/
__be32 offset;
} __packed;
struct homa_data_hdr {
struct homa_common_hdr common;
/** @message_length: Total #bytes in the message. */
__be32 message_length;
/**
* @incoming: The receiver can expect the sender to send all of the
* bytes in the message up to at least this offset (exclusive),
* even without additional grants. This includes unscheduled
* bytes, granted bytes, plus any additional bytes the sender
* transmits unilaterally (e.g., to round up to a full GSO batch).
*/
__be32 incoming;
/** @ack: If the @client_id field of this is nonzero, provides info
* about an RPC that the recipient can now safely free. Note: in
* TSO packets this will get duplicated in each of the segments;
* in order to avoid repeated attempts to ack the same RPC,
* homa_gro_receive will clear this field in all segments but the
* first.
*/
struct homa_ack ack;
/**
* @cutoff_version: The cutoff_version from the most recent
* CUTOFFS packet that the source of this packet has received
* from the destination of this packet, or 0 if the source hasn't
* yet received a CUTOFFS packet.
*/
__be16 cutoff_version;
/**
* @retransmit: 1 means this packet was sent in response to a RESEND
* (it has already been sent previously).
*/
__u8 retransmit;
char pad[3];
/** @seg: First of possibly many segments. */
struct homa_seg_hdr seg;
} __packed;
_Static_assert(sizeof(struct homa_data_hdr) <= HOMA_MAX_HEADER,
"homa_data_hdr too large for HOMA_MAX_HEADER; must adjust HOMA_MAX_HEADER");
_Static_assert(sizeof(struct homa_data_hdr) >= HOMA_MIN_PKT_LENGTH,
"homa_data_hdr too small: Homa doesn't currently have code to pad data packets");
_Static_assert(((sizeof(struct homa_data_hdr) - sizeof(struct homa_seg_hdr)) &
0x3) == 0,
" homa_data_hdr length not a multiple of 4 bytes (required for TCP/TSO compatibility");
/**
* homa_data_len() - Returns the total number of bytes in a DATA packet
* after the homa_data_hdr. Note: if the packet is a GSO packet, the result
* may include metadata as well as packet data.
* @skb: Incoming data packet
*/
static inline int homa_data_len(struct sk_buff *skb)
{
return skb->len - skb_transport_offset(skb) -
sizeof(struct homa_data_hdr);
}
/**
* struct homa_grant_hdr - Wire format for GRANT packets, which are sent by
* the receiver back to the sender to indicate that the sender may transmit
* additional bytes in the message.
*/
struct homa_grant_hdr {
/** @common: Fields common to all packet types. */
struct homa_common_hdr common;
/**
* @offset: Byte offset within the message.
*
* The sender should now transmit all data up to (but not including)
* this offset ASAP, if it hasn't already.
*/
__be32 offset;
/**
* @priority: The sender should use this priority level for all future
* MESSAGE_FRAG packets for this message, until a GRANT is received
* with higher offset. Larger numbers indicate higher priorities.
*/
__u8 priority;
/**
* @resend_all: Nonzero means that the sender should resend all previously
* transmitted data, starting at the beginning of the message (assume
* that no packets have been successfully received).
*/
__u8 resend_all;
} __packed;
_Static_assert(sizeof(struct homa_grant_hdr) <= HOMA_MAX_HEADER,
"homa_grant_hdr too large for HOMA_MAX_HEADER; must adjust HOMA_MAX_HEADER");
/**
* struct homa_resend_hdr - Wire format for RESEND packets.
*
* A RESEND is sent by the receiver when it believes that message data may
* have been lost in transmission (or if it is concerned that the sender may
* have crashed). The receiver should resend the specified portion of the
* message, even if it already sent it previously.
*/
struct homa_resend_hdr {
/** @common: Fields common to all packet types. */
struct homa_common_hdr common;
/**
* @offset: Offset within the message of the first byte of data that
* should be retransmitted.
*/
__be32 offset;
/**
* @length: Number of bytes of data to retransmit; this could specify
* a range longer than the total message size. Zero is a special case
* used by servers; in this case, there is no need to actually resend
* anything; the purpose of this packet is to trigger an UNKNOWN
* response if the client no longer cares about this RPC.
*/
__be32 length;
/**
* @priority: Packet priority to use.
*
* The sender should transmit all the requested data using this
* priority.
*/
__u8 priority;
} __packed;
_Static_assert(sizeof(struct homa_resend_hdr) <= HOMA_MAX_HEADER,
"homa_resend_hdr too large for HOMA_MAX_HEADER; must adjust HOMA_MAX_HEADER");
/**
* struct homa_unknown_hdr - Wire format for UNKNOWN packets.
*
* An UNKNOWN packet is sent by either server or client when it receives a
* packet for an RPC that is unknown to it. When a client receives an
* UNKNOWN packet it will typically restart the RPC from the beginning;
* when a server receives an UNKNOWN packet it will typically discard its
* state for the RPC.
*/
struct homa_unknown_hdr {
/** @common: Fields common to all packet types. */
struct homa_common_hdr common;
} __packed;
_Static_assert(sizeof(struct homa_unknown_hdr) <= HOMA_MAX_HEADER,
"homa_unknown_hdr too large for HOMA_MAX_HEADER; must adjust HOMA_MAX_HEADER");
/**
* struct busy_header - Wire format for BUSY packets.
*
* These packets tell the recipient that the sender is still alive (even if
* it isn't sending data expected by the recipient).
*/
struct homa_busy_hdr {
/** @common: Fields common to all packet types. */
struct homa_common_hdr common;
} __packed;
_Static_assert(sizeof(struct homa_busy_hdr) <= HOMA_MAX_HEADER,
"homa_busy_hdr too large for HOMA_MAX_HEADER; must adjust HOMA_MAX_HEADER");
/**
* struct homa_cutoffs_hdr - Wire format for CUTOFFS packets.
*
* These packets tell the recipient how to assign priorities to
* unscheduled packets.
*/
struct homa_cutoffs_hdr {
/** @common: Fields common to all packet types. */
struct homa_common_hdr common;
/**
* @unsched_cutoffs: priorities to use for unscheduled packets
* sent to the sender of this packet. See documentation for
* @homa.unsched_cutoffs for the meanings of these values.
*/
__be32 unsched_cutoffs[HOMA_MAX_PRIORITIES];
/**
* @cutoff_version: unique identifier associated with @unsched_cutoffs.
* Must be included in future DATA packets sent to the sender of
* this packet.
*/
__be16 cutoff_version;
} __packed;
_Static_assert(sizeof(struct homa_cutoffs_hdr) <= HOMA_MAX_HEADER,
"homa_cutoffs_hdr too large for HOMA_MAX_HEADER; must adjust HOMA_MAX_HEADER");
/**
* struct homa_freeze_hdr - Wire format for FREEZE packets.
*
* These packets tell the recipient to freeze its timetrace; used
* for debugging.
*/
struct homa_freeze_hdr {
/** @common: Fields common to all packet types. */
struct homa_common_hdr common;
} __packed;
_Static_assert(sizeof(struct homa_freeze_hdr) <= HOMA_MAX_HEADER,
"homa_freeze_hdr too large for HOMA_MAX_HEADER; must adjust HOMA_MAX_HEADER");
/**
* struct homa_need_ack_hdr - Wire format for NEED_ACK packets.
*
* These packets ask the recipient (a client) to return an ACK message if
* the packet's RPC is no longer active.
*/
struct homa_need_ack_hdr {
/** @common: Fields common to all packet types. */
struct homa_common_hdr common;
} __packed;
_Static_assert(sizeof(struct homa_need_ack_hdr) <= HOMA_MAX_HEADER,
"homa_need_ack_hdr too large for HOMA_MAX_HEADER; must adjust HOMA_MAX_HEADER");
/**
* struct homa_ack_hdr - Wire format for ACK packets.
*
* These packets are sent from a client to a server to indicate that
* a set of RPCs is no longer active on the client, so the server can
* free any state it may have for them.
*/
struct homa_ack_hdr {
/** @common: Fields common to all packet types. */
struct homa_common_hdr common;
/** @num_acks: Number of (leading) elements in @acks that are valid. */
__be16 num_acks;
#define HOMA_MAX_ACKS_PER_PKT 5
/** @acks: Info about RPCs that are no longer active. */
struct homa_ack acks[HOMA_MAX_ACKS_PER_PKT];
} __packed;
_Static_assert(sizeof(struct homa_ack_hdr) <= HOMA_MAX_HEADER,
"homa_ack_hdr too large for HOMA_MAX_HEADER; must adjust HOMA_MAX_HEADER");
/**
* homa_local_id(): given an RPC identifier from an input packet (which
* is network-encoded), return the decoded id we should use for that
* RPC on this machine.
* @sender_id: RPC id from an incoming packet, such as h->common.sender_id
*/
static inline __u64 homa_local_id(__be64 sender_id)
{
/* If the client bit was set on the sender side, it needs to be
* removed here, and conversely.
*/
return be64_to_cpu(sender_id) ^ 1;
}
#endif /* _HOMA_WIRE_H */