2008年7月15日 星期二

Improve performance of Bridging devices of different capabilities

Adding wireless interface to bridge would drastically decrease throughput, even if there is no traffic on wireless.

eth0: 90Mbps
br0=eth0 80Mbps
br0=eth0+ath0 53Mbps (only accessed from eth0)

throughput is calculated by accessing vsftpd on DUT.




br_features_recompute get the minimal, common subset of the features supported by the enslaved devices
linux-2.6.16/net/bridge/br_if.c
/*
* Recomputes features using slave's features
*/
void br_features_recompute(struct net_bridge *br)
{
struct net_bridge_port *p;
unsigned long features, checksum;

features = br->feature_mask &~ NETIF_F_IP_CSUM;
checksum = br->feature_mask & NETIF_F_IP_CSUM;

list_for_each_entry(p, &br->port_list, list) {

if (!(p->dev->features
& (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)))
checksum = 0;
features &= p->dev->features;
}

br->dev->features = features | checksum | NETIF_F_LLTX;
}
While bridging a device w/ NETIF_F_IP_CSUM supported(e1000) and one w/o NETIF_F_IP_CSUM (Atheros wireless), the bridge features would become w/o NETIF_F_IP_CSUM. Packets sent to e1000 (tcp_sendmsg) through bridge might be checksumed by software instead of hardware, which cause low throughput.

linux-2.6.16/include/net/sock.h
static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
__sk_dst_set(sk, dst);
sk->sk_route_caps = dst->dev->features;
if (sk->sk_route_caps & NETIF_F_TSO) {
if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
sk->sk_route_caps &= ~NETIF_F_TSO;
}
}


linux-2.6.16/net/ipv4/tcp.c
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size)
{
(...................................)
if (sk->sk_route_caps &
(NETIF_F_IP_CSUM | NETIF_F_NO_CSUM |
NETIF_F_HW_CSUM))
skb->ip_summed = CHECKSUM_HW;
skb_entail(sk, tp, skb);
copy = size_goal;
}

/* Try to append data to the end of skb. */
if (copy > seglen)
copy = seglen;

/* Where to copy to? */
if (skb_tailroom(skb) > 0) {
/* We have some space in skb head. Superb! */
if (copy > skb_tailroom(skb))
copy = skb_tailroom(skb);
if ((err = skb_add_data(skb, from, copy)) != 0)
goto do_fault;
(...................................)

linux-2.6.16/include/linux/skbuff.h
static int skb_add_data(struct sk_buff *skb,
char __user *from, int copy)
{
const int off = skb->len;

if (skb->ip_summed == CHECKSUM_NONE) {
int err = 0;
unsigned int csum = csum_and_copy_from_user(from,
skb_put(skb, copy),
copy, 0, &err);
if (!err) {
skb->csum = csum_block_add(skb->csum, csum, off);
return 0;
}
} else if (!copy_from_user(skb_put(skb, copy), from, copy))
return 0;

__skb_trim(skb, off);
return -EFAULT;
}





But there is code handling the situation that packets which assigned to use some hardware function but the device used to send doesn't support it.
linux-2.6.16/net/core/dev.c
int dev_queue_xmit(struct sk_buff *skb)
{
(........................................)
/* Fragmented skb is linearized if device does not support SG,
* or if at least one of fragments is in highmem and device
* does not support DMA from it.
*/
if (skb_shinfo(skb)->nr_frags &&
(!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
__skb_linearize(skb, GFP_ATOMIC))
goto out_kfree_skb;

/* If packet is not checksummed and device does not support
* checksumming for this protocol, complete checksumming here.
*/
if (skb->ip_summed == CHECKSUM_HW &&
(!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
(!(dev->features & NETIF_F_IP_CSUM) ||
skb->protocol != htons(ETH_P_IP))))
if (skb_checksum_help(skb, 0))
goto out_kfree_skb;


So in br_features_recompute, instead of getting minimal subset features, the bridge device should get the maximum feature union, at least it is true for NETIF_F_HW_CSUM and NETIF_F_SG. Upper layer could facilitate the hardware offload function if the device suport it, and dev_queue_xmit would fix it if it doesn't.

It can be done likewise in 2.6.24.

linux-2.6.24/net/bridge/br_if.c
void br_features_recompute(struct net_bridge *br)
{
struct net_bridge_port *p;
unsigned long features;

features = br->feature_mask;

list_for_each_entry(p, &br->port_list, list) {
features = netdev_compute_features(features, p->dev->features);
}

br->dev->features = features;
}



linux-2.6.24/net/core/dev.c
/**
* netdev_compute_feature - compute conjunction of two feature sets
* @all: first feature set
* @one: second feature set
*
* Computes a new feature set after adding a device with feature set
* @one to the master device with current feature set @all. Returns
* the new feature set.
*/
int netdev_compute_features(unsigned long all, unsigned long one)
{
/* if device needs checksumming, downgrade to hw checksumming */
if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;

/* if device can't do all checksum, downgrade to ipv4/ipv6 */
if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
all ^= NETIF_F_HW_CSUM
| NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;

if (one & NETIF_F_GSO)
one |= NETIF_F_GSO_SOFTWARE;
one |= NETIF_F_GSO;

/* If even one device supports robust GSO, enable it for all. */
if (one & NETIF_F_GSO_ROBUST)
all |= NETIF_F_GSO_ROBUST;

all &= one | NETIF_F_LLTX;

if (!(all & NETIF_F_ALL_CSUM))
all &= ~NETIF_F_SG;
if (!(all & NETIF_F_SG))
all &= ~NETIF_F_GSO_MASK;

return all;
}
EXPORT_SYMBOL(netdev_compute_features);

沒有留言: