2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include "netdev-dpdk.h"
31 enum OVS_PACKED_ENUM dp_packet_source {
32 DPBUF_MALLOC, /* Obtained via malloc(). */
33 DPBUF_STACK, /* Un-movable stack space or static buffer. */
34 DPBUF_STUB, /* Starts on stack, may expand into heap. */
35 DPBUF_DPDK, /* buffer data is from DPDK allocated memory.
36 ref to build_dp_packet() in netdev-dpdk. */
39 /* Buffer for holding arbitrary data. An dp_packet is automatically reallocated
40 * as necessary if it grows too large for the available memory.
42 * 'frame' and offset conventions:
44 * Network frames (aka "packets"): 'frame' MUST be set to the start of the
45 * packet, layer offsets MAY be set as appropriate for the packet.
46 * Additionally, we assume in many places that the 'frame' and 'data' are
47 * the same for packets.
49 * OpenFlow messages: 'frame' points to the start of the OpenFlow
50 * header, while 'l3_ofs' is the length of the OpenFlow header.
51 * When parsing, the 'data' will move past these, as data is being
52 * pulled from the OpenFlow message.
54 * Actions: When encoding OVS action lists, the 'frame' is used
55 * as a pointer to the beginning of the current action (see ofpact_put()).
57 * rconn: Reuses 'frame' as a private pointer while queuing.
61 struct rte_mbuf mbuf; /* DPDK mbuf */
63 void *base_; /* First byte of allocated space. */
64 uint16_t data_ofs; /* First byte actually in use. */
65 uint32_t size_; /* Number of bytes in use. */
66 uint32_t rss_hash; /* Packet hash. */
68 uint32_t allocated; /* Number of bytes allocated. */
70 void *frame; /* Packet frame start, or NULL. */
71 enum dp_packet_source source; /* Source of memory allocated as 'base'. */
72 uint8_t l2_pad_size; /* Detected l2 padding size.
73 * Padding is non-pullable. */
74 uint16_t l2_5_ofs; /* MPLS label stack offset from 'frame', or
76 uint16_t l3_ofs; /* Network-level header offset from 'frame',
78 uint16_t l4_ofs; /* Transport-level header offset from 'frame',
80 struct pkt_metadata md;
83 static inline void * dp_packet_data(const struct dp_packet *);
84 static inline void dp_packet_set_data(struct dp_packet *, void *);
85 static inline void * dp_packet_base(const struct dp_packet *);
86 static inline void dp_packet_set_base(struct dp_packet *, void *);
88 static inline uint32_t dp_packet_size(const struct dp_packet *);
89 static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
91 void * dp_packet_resize_l2(struct dp_packet *, int increment);
92 void * dp_packet_resize_l2_5(struct dp_packet *, int increment);
93 static inline void * dp_packet_l2(const struct dp_packet *);
94 static inline void dp_packet_set_frame(struct dp_packet *, void *);
95 static inline uint8_t dp_packet_l2_pad_size(const struct dp_packet *);
96 static inline void dp_packet_set_l2_pad_size(struct dp_packet *, uint8_t);
97 static inline void * dp_packet_l2_5(const struct dp_packet *);
98 static inline void dp_packet_set_l2_5(struct dp_packet *, void *);
99 static inline void * dp_packet_l3(const struct dp_packet *);
100 static inline void dp_packet_set_l3(struct dp_packet *, void *);
101 static inline void * dp_packet_l4(const struct dp_packet *);
102 static inline void dp_packet_set_l4(struct dp_packet *, void *);
103 static inline size_t dp_packet_l4_size(const struct dp_packet *);
104 static inline const void *dp_packet_get_tcp_payload(const struct dp_packet *);
105 static inline const void *dp_packet_get_udp_payload(const struct dp_packet *);
106 static inline const void *dp_packet_get_sctp_payload(const struct dp_packet *);
107 static inline const void *dp_packet_get_icmp_payload(const struct dp_packet *);
108 static inline const void *dp_packet_get_nd_payload(const struct dp_packet *);
110 void dp_packet_use(struct dp_packet *, void *, size_t);
111 void dp_packet_use_stub(struct dp_packet *, void *, size_t);
112 void dp_packet_use_const(struct dp_packet *, const void *, size_t);
114 void dp_packet_init_dpdk(struct dp_packet *b, size_t allocated);
116 void dp_packet_init(struct dp_packet *, size_t);
117 void dp_packet_uninit(struct dp_packet *);
118 static inline void *dp_packet_get_uninit_pointer(struct dp_packet *);
120 struct dp_packet *dp_packet_new(size_t);
121 struct dp_packet *dp_packet_new_with_headroom(size_t, size_t headroom);
122 struct dp_packet *dp_packet_clone(const struct dp_packet *);
123 struct dp_packet *dp_packet_clone_with_headroom(const struct dp_packet *,
125 struct dp_packet *dp_packet_clone_data(const void *, size_t);
126 struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t,
128 static inline void dp_packet_delete(struct dp_packet *);
130 static inline void *dp_packet_at(const struct dp_packet *, size_t offset,
132 static inline void *dp_packet_at_assert(const struct dp_packet *, size_t offset,
134 static inline void *dp_packet_tail(const struct dp_packet *);
135 static inline void *dp_packet_end(const struct dp_packet *);
137 void *dp_packet_put_uninit(struct dp_packet *, size_t);
138 void *dp_packet_put_zeros(struct dp_packet *, size_t);
139 void *dp_packet_put(struct dp_packet *, const void *, size_t);
140 char *dp_packet_put_hex(struct dp_packet *, const char *s, size_t *n);
141 void dp_packet_reserve(struct dp_packet *, size_t);
142 void dp_packet_reserve_with_tailroom(struct dp_packet *b, size_t headroom,
144 void *dp_packet_push_uninit(struct dp_packet *b, size_t);
145 void *dp_packet_push_zeros(struct dp_packet *, size_t);
146 void *dp_packet_push(struct dp_packet *b, const void *, size_t);
148 static inline size_t dp_packet_headroom(const struct dp_packet *);
149 static inline size_t dp_packet_tailroom(const struct dp_packet *);
150 void dp_packet_prealloc_headroom(struct dp_packet *, size_t);
151 void dp_packet_prealloc_tailroom(struct dp_packet *, size_t);
152 void dp_packet_shift(struct dp_packet *, int);
154 static inline void dp_packet_clear(struct dp_packet *);
155 static inline void *dp_packet_pull(struct dp_packet *, size_t);
156 static inline void *dp_packet_try_pull(struct dp_packet *, size_t);
158 void *dp_packet_steal_data(struct dp_packet *);
160 char *dp_packet_to_string(const struct dp_packet *, size_t maxbytes);
161 static inline bool dp_packet_equal(const struct dp_packet *, const struct dp_packet *);
164 /* Returns a pointer that may be passed to free() to accomplish the same thing
165 * as dp_packet_uninit(b). The return value is a null pointer if dp_packet_uninit()
166 * would not free any memory. */
167 static inline void *dp_packet_get_uninit_pointer(struct dp_packet *b)
169 /* XXX: If 'source' is DPBUF_DPDK memory gets leaked! */
170 return b && b->source == DPBUF_MALLOC ? dp_packet_base(b) : NULL;
173 /* Frees memory that 'b' points to, as well as 'b' itself. */
174 static inline void dp_packet_delete(struct dp_packet *b)
177 if (b->source == DPBUF_DPDK) {
178 /* If this dp_packet was allocated by DPDK it must have been
179 * created as a dp_packet */
180 free_dpdk_buf((struct dp_packet*) b);
189 /* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
190 * byte 'offset'. Otherwise, returns a null pointer. */
191 static inline void *dp_packet_at(const struct dp_packet *b, size_t offset,
194 return offset + size <= dp_packet_size(b) ? (char *) dp_packet_data(b) + offset : NULL;
197 /* Returns a pointer to byte 'offset' in 'b', which must contain at least
198 * 'offset + size' bytes of data. */
199 static inline void *dp_packet_at_assert(const struct dp_packet *b, size_t offset,
202 ovs_assert(offset + size <= dp_packet_size(b));
203 return ((char *) dp_packet_data(b)) + offset;
206 /* Returns a pointer to byte following the last byte of data in use in 'b'. */
207 static inline void *dp_packet_tail(const struct dp_packet *b)
209 return (char *) dp_packet_data(b) + dp_packet_size(b);
212 /* Returns a pointer to byte following the last byte allocated for use (but
213 * not necessarily in use) in 'b'. */
214 static inline void *dp_packet_end(const struct dp_packet *b)
216 return (char *) dp_packet_base(b) + b->allocated;
219 /* Returns the number of bytes of headroom in 'b', that is, the number of bytes
220 * of unused space in dp_packet 'b' before the data that is in use. (Most
221 * commonly, the data in a dp_packet is at its beginning, and thus the dp_packet's
223 static inline size_t dp_packet_headroom(const struct dp_packet *b)
225 return (char*)dp_packet_data(b) - (char*)dp_packet_base(b);
228 /* Returns the number of bytes that may be appended to the tail end of dp_packet
229 * 'b' before the dp_packet must be reallocated. */
230 static inline size_t dp_packet_tailroom(const struct dp_packet *b)
232 return (char*)dp_packet_end(b) - (char*)dp_packet_tail(b);
235 /* Clears any data from 'b'. */
236 static inline void dp_packet_clear(struct dp_packet *b)
238 dp_packet_set_data(b, dp_packet_base(b));
239 dp_packet_set_size(b, 0);
242 /* Removes 'size' bytes from the head end of 'b', which must contain at least
243 * 'size' bytes of data. Returns the first byte of data removed. */
244 static inline void *dp_packet_pull(struct dp_packet *b, size_t size)
246 void *data = dp_packet_data(b);
247 ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size);
248 dp_packet_set_data(b, (char*)dp_packet_data(b) + size);
249 dp_packet_set_size(b, dp_packet_size(b) - size);
253 /* If 'b' has at least 'size' bytes of data, removes that many bytes from the
254 * head end of 'b' and returns the first byte removed. Otherwise, returns a
255 * null pointer without modifying 'b'. */
256 static inline void *dp_packet_try_pull(struct dp_packet *b, size_t size)
258 return dp_packet_size(b) - dp_packet_l2_pad_size(b) >= size
259 ? dp_packet_pull(b, size) : NULL;
262 static inline bool dp_packet_equal(const struct dp_packet *a, const struct dp_packet *b)
264 return dp_packet_size(a) == dp_packet_size(b) &&
265 memcmp(dp_packet_data(a), dp_packet_data(b), dp_packet_size(a)) == 0;
268 /* Get the start if the Ethernet frame. 'l3_ofs' marks the end of the l2
269 * headers, so return NULL if it is not set. */
270 static inline void * dp_packet_l2(const struct dp_packet *b)
272 return (b->l3_ofs != UINT16_MAX) ? b->frame : NULL;
275 /* Sets the packet frame start pointer and resets all layer offsets.
276 * l3 offset must be set before 'l2' can be retrieved. */
277 static inline void dp_packet_set_frame(struct dp_packet *b, void *packet)
281 b->l2_5_ofs = UINT16_MAX;
282 b->l3_ofs = UINT16_MAX;
283 b->l4_ofs = UINT16_MAX;
286 static inline uint8_t dp_packet_l2_pad_size(const struct dp_packet *b)
288 return b->l2_pad_size;
291 static inline void dp_packet_set_l2_pad_size(struct dp_packet *b, uint8_t pad_size)
293 ovs_assert(pad_size <= dp_packet_size(b));
294 b->l2_pad_size = pad_size;
297 static inline void * dp_packet_l2_5(const struct dp_packet *b)
299 return b->l2_5_ofs != UINT16_MAX ? (char *)b->frame + b->l2_5_ofs : NULL;
302 static inline void dp_packet_set_l2_5(struct dp_packet *b, void *l2_5)
304 b->l2_5_ofs = l2_5 ? (char *)l2_5 - (char *)b->frame : UINT16_MAX;
307 static inline void * dp_packet_l3(const struct dp_packet *b)
309 return b->l3_ofs != UINT16_MAX ? (char *)b->frame + b->l3_ofs : NULL;
312 static inline void dp_packet_set_l3(struct dp_packet *b, void *l3)
314 b->l3_ofs = l3 ? (char *)l3 - (char *)b->frame : UINT16_MAX;
317 static inline void * dp_packet_l4(const struct dp_packet *b)
319 return b->l4_ofs != UINT16_MAX ? (char *)b->frame + b->l4_ofs : NULL;
322 static inline void dp_packet_set_l4(struct dp_packet *b, void *l4)
324 b->l4_ofs = l4 ? (char *)l4 - (char *)b->frame : UINT16_MAX;
327 static inline size_t dp_packet_l4_size(const struct dp_packet *b)
329 return b->l4_ofs != UINT16_MAX
330 ? (const char *)dp_packet_tail(b) - (const char *)dp_packet_l4(b)
331 - dp_packet_l2_pad_size(b)
335 static inline const void *dp_packet_get_tcp_payload(const struct dp_packet *b)
337 size_t l4_size = dp_packet_l4_size(b);
339 if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) {
340 struct tcp_header *tcp = dp_packet_l4(b);
341 int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
343 if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) {
344 return (const char *)tcp + tcp_len;
350 static inline const void *dp_packet_get_udp_payload(const struct dp_packet *b)
352 return OVS_LIKELY(dp_packet_l4_size(b) >= UDP_HEADER_LEN)
353 ? (const char *)dp_packet_l4(b) + UDP_HEADER_LEN : NULL;
356 static inline const void *dp_packet_get_sctp_payload(const struct dp_packet *b)
358 return OVS_LIKELY(dp_packet_l4_size(b) >= SCTP_HEADER_LEN)
359 ? (const char *)dp_packet_l4(b) + SCTP_HEADER_LEN : NULL;
362 static inline const void *dp_packet_get_icmp_payload(const struct dp_packet *b)
364 return OVS_LIKELY(dp_packet_l4_size(b) >= ICMP_HEADER_LEN)
365 ? (const char *)dp_packet_l4(b) + ICMP_HEADER_LEN : NULL;
368 static inline const void *dp_packet_get_nd_payload(const struct dp_packet *b)
370 return OVS_LIKELY(dp_packet_l4_size(b) >= ND_MSG_LEN)
371 ? (const char *)dp_packet_l4(b) + ND_MSG_LEN : NULL;
375 BUILD_ASSERT_DECL(offsetof(struct dp_packet, mbuf) == 0);
377 static inline void * dp_packet_base(const struct dp_packet *b)
379 return b->mbuf.buf_addr;
382 static inline void dp_packet_set_base(struct dp_packet *b, void *d)
384 b->mbuf.buf_addr = d;
387 static inline uint32_t dp_packet_size(const struct dp_packet *b)
389 return b->mbuf.pkt_len;
392 static inline void dp_packet_set_size(struct dp_packet *b, uint32_t v)
394 /* netdev-dpdk does not currently support segmentation; consequently, for
395 * all intents and purposes, 'data_len' (16 bit) and 'pkt_len' (32 bit) may
396 * be used interchangably.
398 * On the datapath, it is expected that the size of packets
399 * (and thus 'v') will always be <= UINT16_MAX; this means that there is no
400 * loss of accuracy in assigning 'v' to 'data_len'.
402 * However, control ofpbufs may well be larger than UINT16_MAX (i.e. 'v' >
403 * UINT16_MAX); even though the value is truncated when assigned to
404 * 'data_len', loss of accuracy is avoided in this situation by using
405 * 'pkt_len' to represent the packet size.
407 b->mbuf.data_len = (uint16_t)v; /* Current seg length. */
408 b->mbuf.pkt_len = v; /* Total length of all segments linked to
413 static inline uint16_t __packet_data(const struct dp_packet *b)
415 return b->mbuf.data_off;
418 static inline void __packet_set_data(struct dp_packet *b, uint16_t v)
420 b->mbuf.data_off = v;
424 static inline void * dp_packet_base(const struct dp_packet *b)
429 static inline void dp_packet_set_base(struct dp_packet *b, void *d)
434 static inline uint32_t dp_packet_size(const struct dp_packet *b)
439 static inline void dp_packet_set_size(struct dp_packet *b, uint32_t v)
444 static inline uint16_t __packet_data(const struct dp_packet *b)
449 static inline void __packet_set_data(struct dp_packet *b, uint16_t v)
456 static inline void * dp_packet_data(const struct dp_packet *b)
458 return __packet_data(b) != UINT16_MAX ?
459 (char *)dp_packet_base(b) + __packet_data(b) : NULL;
462 static inline void dp_packet_set_data(struct dp_packet *b, void *data)
465 __packet_set_data(b, (char *)data - (char *)dp_packet_base(b));
467 __packet_set_data(b, UINT16_MAX);
471 static inline void dp_packet_reset_packet(struct dp_packet *b, int off)
473 dp_packet_set_size(b, dp_packet_size(b) - off);
474 dp_packet_set_data(b, (void *) ((unsigned char *) b->frame + off));
476 b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX;
479 static inline uint32_t dp_packet_get_rss_hash(struct dp_packet *p)
482 return p->mbuf.hash.rss;
488 static inline void dp_packet_set_rss_hash(struct dp_packet *p,
492 p->mbuf.hash.rss = hash;
502 #endif /* dp-packet.h */