net_pkt: Extend packet abstraction as required by e1000e functionality

This patch extends the TX/RX packet abstractions with features that will
be used by the e1000e device implementation.

Changes are:

  1. Support iovec lists for RX buffers
  2. Deeper RX packets parsing
  3. Loopback option for TX packets
  4. Extended VLAN headers handling
  5. RSS processing for RX packets

Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
This commit is contained in:
Dmitry Fleytman 2016-06-01 11:23:41 +03:00 committed by Jason Wang
parent 66409b7c8b
commit eb700029c7
9 changed files with 1346 additions and 215 deletions

View file

@ -95,12 +95,11 @@ void net_checksum_calculate(uint8_t *data, int length)
uint32_t
net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt,
uint32_t iov_off, uint32_t size)
uint32_t iov_off, uint32_t size, uint32_t csum_offset)
{
size_t iovec_off, buf_off;
unsigned int i;
uint32_t res = 0;
uint32_t seq = 0;
iovec_off = 0;
buf_off = 0;
@ -109,8 +108,8 @@ net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt,
size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size);
void *chunk_buf = iov[i].iov_base + (iov_off - iovec_off);
res += net_checksum_add_cont(len, chunk_buf, seq);
seq += len;
res += net_checksum_add_cont(len, chunk_buf, csum_offset);
csum_offset += len;
buf_off += len;
iov_off += len;

418
net/eth.c
View file

@ -21,8 +21,8 @@
#include "qemu-common.h"
#include "net/tap.h"
void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
bool *is_new)
void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
uint16_t vlan_ethtype, bool *is_new)
{
struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
@ -36,7 +36,7 @@ void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
default:
/* No VLAN header, put a new one */
vhdr->h_proto = ehdr->h_proto;
ehdr->h_proto = cpu_to_be16(ETH_P_VLAN);
ehdr->h_proto = cpu_to_be16(vlan_ethtype);
*is_new = true;
break;
}
@ -79,26 +79,100 @@ eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
}
void eth_get_protocols(const uint8_t *headers,
uint32_t hdr_length,
uint16_t
eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
{
uint16_t proto;
size_t copied;
size_t size = iov_size(l2hdr_iov, iovcnt);
size_t proto_offset = l2hdr_len - sizeof(proto);
if (size < proto_offset) {
return ETH_P_UNKNOWN;
}
copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
&proto, sizeof(proto));
return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
}
static bool
_eth_copy_chunk(size_t input_size,
const struct iovec *iov, int iovcnt,
size_t offset, size_t length,
void *buffer)
{
size_t copied;
if (input_size < offset) {
return false;
}
copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
if (copied < length) {
return false;
}
return true;
}
static bool
_eth_tcp_has_data(bool is_ip4,
const struct ip_header *ip4_hdr,
const struct ip6_header *ip6_hdr,
size_t full_ip6hdr_len,
const struct tcp_header *tcp)
{
uint32_t l4len;
if (is_ip4) {
l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
} else {
size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
}
return l4len > TCP_HEADER_DATA_OFFSET(tcp);
}
void eth_get_protocols(const struct iovec *iov, int iovcnt,
bool *isip4, bool *isip6,
bool *isudp, bool *istcp)
bool *isudp, bool *istcp,
size_t *l3hdr_off,
size_t *l4hdr_off,
size_t *l5hdr_off,
eth_ip6_hdr_info *ip6hdr_info,
eth_ip4_hdr_info *ip4hdr_info,
eth_l4_hdr_info *l4hdr_info)
{
int proto;
size_t l2hdr_len = eth_get_l2_hdr_length(headers);
assert(hdr_length >= eth_get_l2_hdr_length(headers));
bool fragment = false;
size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
size_t input_size = iov_size(iov, iovcnt);
size_t copied;
*isip4 = *isip6 = *isudp = *istcp = false;
proto = eth_get_l3_proto(headers, l2hdr_len);
proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
*l3hdr_off = l2hdr_len;
if (proto == ETH_P_IP) {
struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
if (input_size < l2hdr_len) {
return;
}
copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
*isip4 = true;
struct ip_header *iphdr;
assert(hdr_length >=
eth_get_l2_hdr_length(headers) + sizeof(struct ip_header));
iphdr = PKT_GET_IP_HDR(headers);
if (copied < sizeof(*iphdr)) {
return;
}
if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
if (iphdr->ip_p == IP_PROTO_TCP) {
@ -107,33 +181,149 @@ void eth_get_protocols(const uint8_t *headers,
*isudp = true;
}
}
} else if (proto == ETH_P_IPV6) {
uint8_t l4proto;
size_t full_ip6hdr_len;
struct iovec hdr_vec;
hdr_vec.iov_base = (void *) headers;
hdr_vec.iov_len = hdr_length;
ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
*l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
fragment = ip4hdr_info->fragment;
} else if (proto == ETH_P_IPV6) {
*isip6 = true;
if (eth_parse_ipv6_hdr(&hdr_vec, 1, l2hdr_len,
&l4proto, &full_ip6hdr_len)) {
if (l4proto == IP_PROTO_TCP) {
if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
ip6hdr_info)) {
if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
*istcp = true;
} else if (l4proto == IP_PROTO_UDP) {
} else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
*isudp = true;
}
} else {
return;
}
*l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
fragment = ip6hdr_info->fragment;
}
if (!fragment) {
if (*istcp) {
*istcp = _eth_copy_chunk(input_size,
iov, iovcnt,
*l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
&l4hdr_info->hdr.tcp);
if (istcp) {
*l5hdr_off = *l4hdr_off +
TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
l4hdr_info->has_tcp_data =
_eth_tcp_has_data(proto == ETH_P_IP,
&ip4hdr_info->ip4_hdr,
&ip6hdr_info->ip6_hdr,
*l4hdr_off - *l3hdr_off,
&l4hdr_info->hdr.tcp);
}
} else if (*isudp) {
*isudp = _eth_copy_chunk(input_size,
iov, iovcnt,
*l4hdr_off, sizeof(l4hdr_info->hdr.udp),
&l4hdr_info->hdr.udp);
*l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
}
}
}
bool
eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
uint8_t *new_ehdr_buf,
uint16_t *payload_offset, uint16_t *tci)
{
struct vlan_header vlan_hdr;
struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
size_t copied = iov_to_buf(iov, iovcnt, iovoff,
new_ehdr, sizeof(*new_ehdr));
if (copied < sizeof(*new_ehdr)) {
return false;
}
switch (be16_to_cpu(new_ehdr->h_proto)) {
case ETH_P_VLAN:
case ETH_P_DVLAN:
copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
&vlan_hdr, sizeof(vlan_hdr));
if (copied < sizeof(vlan_hdr)) {
return false;
}
new_ehdr->h_proto = vlan_hdr.h_proto;
*tci = be16_to_cpu(vlan_hdr.h_tci);
*payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
copied = iov_to_buf(iov, iovcnt, *payload_offset,
PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
if (copied < sizeof(vlan_hdr)) {
return false;
}
*payload_offset += sizeof(vlan_hdr);
}
return true;
default:
return false;
}
}
bool
eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
uint16_t vet, uint8_t *new_ehdr_buf,
uint16_t *payload_offset, uint16_t *tci)
{
struct vlan_header vlan_hdr;
struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
size_t copied = iov_to_buf(iov, iovcnt, iovoff,
new_ehdr, sizeof(*new_ehdr));
if (copied < sizeof(*new_ehdr)) {
return false;
}
if (be16_to_cpu(new_ehdr->h_proto) == vet) {
copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
&vlan_hdr, sizeof(vlan_hdr));
if (copied < sizeof(vlan_hdr)) {
return false;
}
new_ehdr->h_proto = vlan_hdr.h_proto;
*tci = be16_to_cpu(vlan_hdr.h_tci);
*payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
return true;
}
return false;
}
void
eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
void *l3hdr, size_t l3hdr_len,
size_t l3payload_len,
size_t frag_offset, bool more_frags)
{
if (eth_get_l3_proto(l2hdr, l2hdr_len) == ETH_P_IP) {
const struct iovec l2vec = {
.iov_base = (void *) l2hdr,
.iov_len = l2hdr_len
};
if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
uint16_t orig_flags;
struct ip_header *iphdr = (struct ip_header *) l3hdr;
uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
@ -158,7 +348,9 @@ eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
}
uint32_t
eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl)
eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
uint16_t csl,
uint32_t *cso)
{
struct ip_pseudo_header ipph;
ipph.ip_src = iphdr->ip_src;
@ -166,7 +358,26 @@ eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl)
ipph.ip_payload = cpu_to_be16(csl);
ipph.ip_proto = iphdr->ip_p;
ipph.zeros = 0;
return net_checksum_add(sizeof(ipph), (uint8_t *) &ipph);
*cso = sizeof(ipph);
return net_checksum_add(*cso, (uint8_t *) &ipph);
}
uint32_t
eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
uint16_t csl,
uint8_t l4_proto,
uint32_t *cso)
{
struct ip6_pseudo_header ipph;
ipph.ip6_src = iphdr->ip6_src;
ipph.ip6_dst = iphdr->ip6_dst;
ipph.len = cpu_to_be16(csl);
ipph.zero[0] = 0;
ipph.zero[1] = 0;
ipph.zero[2] = 0;
ipph.next_hdr = l4_proto;
*cso = sizeof(ipph);
return net_checksum_add(*cso, (uint8_t *)&ipph);
}
static bool
@ -186,33 +397,152 @@ eth_is_ip6_extension_header_type(uint8_t hdr_type)
}
}
bool eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags,
size_t ip6hdr_off, uint8_t *l4proto,
size_t *full_hdr_len)
static bool
_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
size_t rthdr_offset,
struct ip6_ext_hdr *ext_hdr,
struct in6_address *dst_addr)
{
struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
if ((rthdr->rtype == 2) &&
(rthdr->len == sizeof(struct in6_address) / 8) &&
(rthdr->segleft == 1)) {
size_t input_size = iov_size(pkt, pkt_frags);
size_t bytes_read;
if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
return false;
}
bytes_read = iov_to_buf(pkt, pkt_frags,
rthdr_offset + sizeof(*ext_hdr),
dst_addr, sizeof(dst_addr));
return bytes_read == sizeof(dst_addr);
}
return false;
}
static bool
_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
size_t dsthdr_offset,
struct ip6_ext_hdr *ext_hdr,
struct in6_address *src_addr)
{
size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
struct ip6_option_hdr opthdr;
size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
while (bytes_left > sizeof(opthdr)) {
size_t input_size = iov_size(pkt, pkt_frags);
size_t bytes_read, optlen;
if (input_size < opt_offset) {
return false;
}
bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
&opthdr, sizeof(opthdr));
if (bytes_read != sizeof(opthdr)) {
return false;
}
optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
: (opthdr.len + sizeof(opthdr));
if (optlen > bytes_left) {
return false;
}
if (opthdr.type == IP6_OPT_HOME) {
size_t input_size = iov_size(pkt, pkt_frags);
if (input_size < opt_offset + sizeof(opthdr)) {
return false;
}
bytes_read = iov_to_buf(pkt, pkt_frags,
opt_offset + sizeof(opthdr),
src_addr, sizeof(src_addr));
return bytes_read == sizeof(src_addr);
}
opt_offset += optlen;
bytes_left -= optlen;
}
return false;
}
bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
size_t ip6hdr_off, eth_ip6_hdr_info *info)
{
struct ip6_header ip6_hdr;
struct ip6_ext_hdr ext_hdr;
size_t bytes_read;
uint8_t curr_ext_hdr_type;
size_t input_size = iov_size(pkt, pkt_frags);
bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
&ip6_hdr, sizeof(ip6_hdr));
if (bytes_read < sizeof(ip6_hdr)) {
info->rss_ex_dst_valid = false;
info->rss_ex_src_valid = false;
info->fragment = false;
if (input_size < ip6hdr_off) {
return false;
}
*full_hdr_len = sizeof(struct ip6_header);
bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
&info->ip6_hdr, sizeof(info->ip6_hdr));
if (bytes_read < sizeof(info->ip6_hdr)) {
return false;
}
if (!eth_is_ip6_extension_header_type(ip6_hdr.ip6_nxt)) {
*l4proto = ip6_hdr.ip6_nxt;
info->full_hdr_len = sizeof(struct ip6_header);
curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
info->l4proto = info->ip6_hdr.ip6_nxt;
info->has_ext_hdrs = false;
return true;
}
do {
bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + *full_hdr_len,
&ext_hdr, sizeof(ext_hdr));
*full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
} while (eth_is_ip6_extension_header_type(ext_hdr.ip6r_nxt));
info->has_ext_hdrs = true;
*l4proto = ext_hdr.ip6r_nxt;
do {
if (input_size < ip6hdr_off + info->full_hdr_len) {
return false;
}
bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
&ext_hdr, sizeof(ext_hdr));
if (bytes_read < sizeof(ext_hdr)) {
return false;
}
if (curr_ext_hdr_type == IP6_ROUTING) {
info->rss_ex_dst_valid =
_eth_get_rss_ex_dst_addr(pkt, pkt_frags,
ip6hdr_off + info->full_hdr_len,
&ext_hdr, &info->rss_ex_dst);
} else if (curr_ext_hdr_type == IP6_DESTINATON) {
info->rss_ex_src_valid =
_eth_get_rss_ex_src_addr(pkt, pkt_frags,
ip6hdr_off + info->full_hdr_len,
&ext_hdr, &info->rss_ex_src);
} else if (curr_ext_hdr_type == IP6_FRAGMENT) {
info->fragment = true;
}
info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
curr_ext_hdr_type = ext_hdr.ip6r_nxt;
} while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
info->l4proto = ext_hdr.ip6r_nxt;
return true;
}