[RFC] batman-adv: Reserve needed_headroom for fragments

Message ID 20201125122438.955972-1-sven@narfation.org (mailing list archive)
State RFC, archived
Delegated to: Simon Wunderlich
Headers
Series [RFC] batman-adv: Reserve needed_headroom for fragments |

Commit Message

Sven Eckelmann Nov. 25, 2020, 12:24 p.m. UTC
  TODO: write something about the extra headroom vxlan needs and why it
reduced the performance significantly when only using the minimum reserved
space.

Cc: Annika Wickert <annika.wickert@exaring.de>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
---
 net/batman-adv/fragmentation.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)
  

Comments

Annika Wickert Nov. 25, 2020, 11:14 p.m. UTC | #1
Hi,

I tried the patch in production on our gateway as our problem is rather significant. I don't see an improvement in the pskb_expand_head() behaviour. (Flamegraph https://cloud.awlnx.space/owncloud/s/3wtoakc4mzFQSGs)

I tried to debug even further and added some debugging points in br_if.c and vxlan.c

This is what I get from the bridge when bat0 is enslaved with the vxlan interface as member of batman ( https://elixir.bootlin.com/linux/latest/source/net/bridge/br_if.c#L311 )
[   36.959547] Bridge firewalling registered
[  522.221767] SKB Bridge br_if.c: max_headroom 0
[  522.221781] SKB Bridge br_if.c: new_hr 0
[  627.186129] SKB Bridge br_if.c: max_headroom 0
[  627.186139] SKB Bridge br_if.c: new_hr 0
[  627.616650] SKB Bridge br_if.c: new_hr 102

Also BATMAN reports itself when initialized and seems not to propagate stuff up the stack on change?: (https://github.com/open-mesh-mirror/batman-adv/blob/master/net/batman-adv/hard-interface.c#L555  )
[ 3350.212094] SKB hard-interface.h: soft_iface->needed_tailroom) 0
[3350.212105] SKB hard-interface.h: soft_iface->needed_headroom) 358
[ 3350.212116] SKB hard-interface.h: lower_headroom 70
[ 3350.212126] SKB hard-interface.h: needed_headroom 102

Also added some debugging to Fragmentation.c in BATMAN after the patch: 
Nov 25 17:48:26 raspi-1gb.awlnx.space kernel: SKB Fragmentation.c: ll_reserved 96
Nov 25 17:48:26 raspi-1gb.awlnx.space kernel: SKB Fragmentation.c: skb->len 762
Nov 25 17:48:26 raspi-1gb.awlnx.space kernel: SKB Fragmentation.c: header_size 20
Nov 25 17:48:26 raspi-1gb.awlnx.space kernel: SKB Fragmentation.c: fragment_size 762
Nov 25 17:48:26 raspi-1gb.awlnx.space kernel: SKB Fragmentation.c: ll_reserved 96

At the same time the VXLAN interface which is transported over Wireguard reports this (https://elixir.bootlin.com/linux/latest/source/drivers/net/vxlan.c#L2352 )
[  567.515778] SKB VXLAN vxlan.c: min_headroom 200
[  567.515792] SKB VXLAN vxlan.c: dst->header_len 0
[  567.515805] SKB VXLAN vxlan.c: VXLAN_HLEN 16
[  567.515819] SKB VXLAN vxlan.c: LL_RESERVED_SPACE(dst->dev) 144
[  567.515831] SKB VXLAN vxlan.c: iphdr_len 40

So in my opinion the needed headroom reported by batman is wrong by maybe 200 ? As the min_headroom of vxlan seems to be 200 but BATMAN reports 102 up the stack to the bridge.

If you need any more input we are happy to help. Because the actual performance with running batman over vxlan is really bad. 
We have some figures here: https://gist.github.com/fadenb/9705059cf17eddf60e744e95bf926f05

Thank you for your help!
Annika

--
Annika Wickert 
 
EXARING AG

 
 

On 25.11.20, 13:25, "Sven Eckelmann" <sven@narfation.org> wrote:

    TODO: write something about the extra headroom vxlan needs and why it
    reduced the performance significantly when only using the minimum reserved
    space.

    Cc: Annika Wickert <annika.wickert@exaring.de>
    Signed-off-by: Sven Eckelmann <sven@narfation.org>
    ---
     net/batman-adv/fragmentation.c | 14 +++++++++-----
     1 file changed, 9 insertions(+), 5 deletions(-)

    diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
    index 97220e19..5039b201 100644
    --- a/net/batman-adv/fragmentation.c
    +++ b/net/batman-adv/fragmentation.c
    @@ -391,6 +391,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,

     /**
      * batadv_frag_create() - create a fragment from skb
    + * @net_dev: outgoing device for fragment
      * @skb: skb to create fragment from
      * @frag_head: header to use in new fragment
      * @fragment_size: size of new fragment
    @@ -401,22 +402,24 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
      *
      * Return: the new fragment, NULL on error.
      */
    -static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
    +static struct sk_buff *batadv_frag_create(struct net_device *net_dev,
    +					  struct sk_buff *skb,
     					  struct batadv_frag_packet *frag_head,
     					  unsigned int fragment_size)
     {
     	struct sk_buff *skb_fragment;
     	unsigned int header_size = sizeof(*frag_head);
     	unsigned int mtu = fragment_size + header_size;
    +	int ll_reserved = LL_RESERVED_SPACE(net_dev);

    -	skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
    +	skb_fragment = dev_alloc_skb(ll_reserved + mtu);
     	if (!skb_fragment)
     		goto err;

     	skb_fragment->priority = skb->priority;

     	/* Eat the last mtu-bytes of the skb */
    -	skb_reserve(skb_fragment, header_size + ETH_HLEN);
    +	skb_reserve(skb_fragment, ll_reserved + header_size);
     	skb_split(skb, skb_fragment, skb->len - fragment_size);

     	/* Add the header */
    @@ -439,11 +442,12 @@ int batadv_frag_send_packet(struct sk_buff *skb,
     			    struct batadv_orig_node *orig_node,
     			    struct batadv_neigh_node *neigh_node)
     {
    +	struct net_device *net_dev = neigh_node->if_incoming->net_dev;
     	struct batadv_priv *bat_priv;
     	struct batadv_hard_iface *primary_if = NULL;
     	struct batadv_frag_packet frag_header;
     	struct sk_buff *skb_fragment;
    -	unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
    +	unsigned int mtu = net_dev->mtu;
     	unsigned int header_size = sizeof(frag_header);
     	unsigned int max_fragment_size, num_fragments;
     	int ret;
    @@ -503,7 +507,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
     			goto put_primary_if;
     		}

    -		skb_fragment = batadv_frag_create(skb, &frag_header,
    +		skb_fragment = batadv_frag_create(net_dev, skb, &frag_header,
     						  max_fragment_size);
     		if (!skb_fragment) {
     			ret = -ENOMEM;
    -- 
    2.29.2
  
Sven Eckelmann Nov. 26, 2020, 8:21 a.m. UTC | #2
Hi,

I find your output slightly confusing. Maybe you can change your printk stuff 
to something more like:

  printk("%s %s:%u max_headroom %u\n", __FILE__, __func__, __LINE__, max_headroom);

On Thursday, 26 November 2020 00:14:35 CET Annika Wickert wrote:
> This is what I get from the bridge when bat0 is enslaved with the vxlan interface as member of batman ( https://elixir.bootlin.com/linux/latest/source/net/bridge/br_if.c#L311 )
> [   36.959547] Bridge firewalling registered
> [  522.221767] SKB Bridge br_if.c: max_headroom 0
> [  522.221781] SKB Bridge br_if.c: new_hr 0
> [  627.186129] SKB Bridge br_if.c: max_headroom 0
> [  627.186139] SKB Bridge br_if.c: new_hr 0
> [  627.616650] SKB Bridge br_if.c: new_hr 102

When is this shown? Does the batadv interface already have its hardif (slave) 
interfaces attached at that point? And did the vxlan report the correct 
needed_headroom to batadv before you've tried to attach the batadv interface 
to the bridge?

Because the bridge can also only change its needed_headroom on interface add 
or delete.

> Also BATMAN reports itself when initialized and seems not to propagate stuff up the stack on change?: (https://github.com/open-mesh-mirror/batman-adv/blob/master/net/batman-adv/hard-interface.c#L555  )
> [ 3350.212094] SKB hard-interface.h: soft_iface->needed_tailroom) 0
> [3350.212105] SKB hard-interface.h: soft_iface->needed_headroom) 358
> [ 3350.212116] SKB hard-interface.h: lower_headroom 70
> [ 3350.212126] SKB hard-interface.h: needed_headroom 102

Afaik, it is "propagating" its stuff by adjusting its own needed_headroom/
tailroom at this point. But there is no way to notify that the headroom/
tailroom was changed and the upper layers should recalculate it.

If you need something like this then we might to have a new 
NETDEV_RESERVED_SPACE_CHANGE (or a better name OR maybe use a netdev_cmd with 
a similar meaning). And then call this whenever the needed_headroom/
tailroom/... of an interface changes during its lifetime. And bridge/batman-
adv/ovs/... have to check the headroom in their notifier_call again when they 
receive this event.

> Also added some debugging to Fragmentation.c in BATMAN after the patch: 
> Nov 25 17:48:26 raspi-1gb.awlnx.space kernel: SKB Fragmentation.c: ll_reserved 96
> Nov 25 17:48:26 raspi-1gb.awlnx.space kernel: SKB Fragmentation.c: skb->len 762
> Nov 25 17:48:26 raspi-1gb.awlnx.space kernel: SKB Fragmentation.c: header_size 20
> Nov 25 17:48:26 raspi-1gb.awlnx.space kernel: SKB Fragmentation.c: fragment_size 762
> Nov 25 17:48:26 raspi-1gb.awlnx.space kernel: SKB Fragmentation.c: ll_reserved 96
> 
> At the same time the VXLAN interface which is transported over Wireguard reports this (https://elixir.bootlin.com/linux/latest/source/drivers/net/vxlan.c#L2352 )
> [  567.515778] SKB VXLAN vxlan.c: min_headroom 200
> [  567.515792] SKB VXLAN vxlan.c: dst->header_len 0
> [  567.515805] SKB VXLAN vxlan.c: VXLAN_HLEN 16
> [  567.515819] SKB VXLAN vxlan.c: LL_RESERVED_SPACE(dst->dev) 144
> [  567.515831] SKB VXLAN vxlan.c: iphdr_len 40
> 
> So in my opinion the needed headroom reported by batman is wrong by maybe 200 ? As the min_headroom of vxlan seems to be 200 but BATMAN reports 102 up the stack to the bridge.

Could it be that the vxlan didn't had the correct needed_headroom when you've 
added it to you batadv interface? Or that the vxlan interface didn't set the 
correct needed_headroom for its lower_dev (see vxlan_config_apply)?



If you have the "slow" setup, can you please do following steps:

* keep vxlan as is (I hope you specify a fixed lowerdev)

  - but try to print the needed headroom in vxlan_config_apply and compare it 
    to the ones from vxlan_build_skb

* remove the vxlan from your batadv interface
* add your vxlan again from the batadv interface

  - check if the headroom numbers are now looking better in 
    batadv_hardif_recalc_extra_skbroom

* remove batadv interface from the bridge
* add your batadv interface again to the bridge

  - is update_headroom() now using the correct headroom information?

> If you need any more input we are happy to help. Because the actual performance with running batman over vxlan is really bad. 
> We have some figures here: https://gist.github.com/fadenb/9705059cf17eddf60e744e95bf926f05

Kind regards,
	Sven
  
Annika Wickert Nov. 26, 2020, 8:58 a.m. UTC | #3
Hi,

    Hi,

   >I find your output slightly confusing. Maybe you can change your printk stuff 
   > to something more like:

   >printk("%s %s:%u max_headroom %u\n", __FILE__, __func__, __LINE__, max_headroom);

Will add this thx.

    >On Thursday, 26 November 2020 00:14:35 CET Annika Wickert wrote:
    >> This is what I get from the bridge when bat0 is enslaved with the vxlan interface as member of batman ( https://elixir.bootlin.com/linux/latest/source/net/bridge/br_if.c#L311 )
    >> [   36.959547] Bridge firewalling registered
    >> [  522.221767] SKB Bridge br_if.c: max_headroom 0
    >> [  522.221781] SKB Bridge br_if.c: new_hr 0
    >> [  627.186129] SKB Bridge br_if.c: max_headroom 0
    >> [  627.186139] SKB Bridge br_if.c: new_hr 0
    >> [  627.616650] SKB Bridge br_if.c: new_hr 102

    >When is this shown? Does the batadv interface already have its hardif (slave) 
    >interfaces attached at that point? And did the vxlan report the correct 
    >needed_headroom to batadv before you've tried to attach the batadv interface 
    >to the bridge?

BATMAN already has the vxlan interface as hardif here is the script I use to generate the config:

ip link add mesh-vpn type vxlan id 4831583 local fe80::2e0:2fff:fe18:dc2f remote fe80::281:8eff:fef0:73aa  dstport 8472 dev wg-uplink
ip link del bat-welt 
rmmod batman-adv
modprobe batman-adv
 batctl ra BATMAN_V
 batctl meshif bat-welt interface add mesh-vpn
 ip link set up bat-welt
 ip link set dev bat-welt master br-welt

   > Because the bridge can also only change its needed_headroom on interface add 
   >or delete.

    >> Also BATMAN reports itself when initialized and seems not to propagate stuff up the stack on change?: (https://github.com/open-mesh-mirror/batman-adv/blob/master/net/batman-adv/hard-interface.c#L555  )
    >> [ 3350.212116] SKB hard-interface.h: lower_headroom 70
    >> [ 3350.212126] SKB hard-interface.h: needed_headroom 102

    >Afaik, it is "propagating" its stuff by adjusting its own needed_headroom/
    >tailroom at this point. But there is no way to notify that the headroom/
    >tailroom was changed and the upper layers should recalculate it.

Which should already include the headroom needed by vxlan as it's already present as hardif. 

    >If you need something like this then we might to have a new 
    >NETDEV_RESERVED_SPACE_CHANGE (or a better name OR maybe use a netdev_cmd with 
    >a similar meaning). And then call this whenever the needed_headroom/
    >tailroom/... of an interface changes during its lifetime. And bridge/batman-
    >adv/ovs/... have to check the headroom in their notifier_call again when they 
    >receive this event.

    >Could it be that the vxlan didn't had the correct needed_headroom when you've 
    >added it to you batadv interface? Or that the vxlan interface didn't set the 
    >correct needed_headroom for its lower_dev (see vxlan_config_apply)?

  The vxlan interface was added first. So it should propagate it?

    >If you have the "slow" setup, can you please do following steps:

    >* keep vxlan as is (I hope you specify a fixed lowerdev)

    >- but try to print the needed headroom in vxlan_config_apply and compare it 
    >  to the ones from vxlan_build_skb

    >* remove the vxlan from your batadv interface
    >* add your vxlan again from the batadv interface

    > - check if the headroom numbers are now looking better in 
    >    batadv_hardif_recalc_extra_skbroom

    > * remove batadv interface from the bridge
    > * add your batadv interface again to the bridge

     > - is update_headroom() now using the correct headroom information?


As the setup is always doing the pskb_expand_head() it should be possible to test this.

Best Annika
  
mephisto@mephis.to Nov. 26, 2020, 10:01 a.m. UTC | #4
We currently observe that issue on our experimental VX-over-Wireguard Gateway, too. 

However, we had observed a similar performance hit about 2y ago  when introducing vxlan as layer below batman for wired mesh which might be related. We ignored it since debugging sessions were not conclusive for me as I'm no Kernelhacker and because it only applied to specific scenarios and vlan still outperformed the VPN connection.. still one similarity is that our debugging sessions also pointed to batman fragmentation that occured in conjunction with VXLan. Running pure batman over wire ran magnitudes faster.. Might be the same thing.

https://github.com/freifunk-gluon/gluon/issues/1315

In our current experimental setup when running vxlan over wireguard that performance hit hurts everyone..
  

Patch

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 97220e19..5039b201 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -391,6 +391,7 @@  bool batadv_frag_skb_fwd(struct sk_buff *skb,
 
 /**
  * batadv_frag_create() - create a fragment from skb
+ * @net_dev: outgoing device for fragment
  * @skb: skb to create fragment from
  * @frag_head: header to use in new fragment
  * @fragment_size: size of new fragment
@@ -401,22 +402,24 @@  bool batadv_frag_skb_fwd(struct sk_buff *skb,
  *
  * Return: the new fragment, NULL on error.
  */
-static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
+static struct sk_buff *batadv_frag_create(struct net_device *net_dev,
+					  struct sk_buff *skb,
 					  struct batadv_frag_packet *frag_head,
 					  unsigned int fragment_size)
 {
 	struct sk_buff *skb_fragment;
 	unsigned int header_size = sizeof(*frag_head);
 	unsigned int mtu = fragment_size + header_size;
+	int ll_reserved = LL_RESERVED_SPACE(net_dev);
 
-	skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
+	skb_fragment = dev_alloc_skb(ll_reserved + mtu);
 	if (!skb_fragment)
 		goto err;
 
 	skb_fragment->priority = skb->priority;
 
 	/* Eat the last mtu-bytes of the skb */
-	skb_reserve(skb_fragment, header_size + ETH_HLEN);
+	skb_reserve(skb_fragment, ll_reserved + header_size);
 	skb_split(skb, skb_fragment, skb->len - fragment_size);
 
 	/* Add the header */
@@ -439,11 +442,12 @@  int batadv_frag_send_packet(struct sk_buff *skb,
 			    struct batadv_orig_node *orig_node,
 			    struct batadv_neigh_node *neigh_node)
 {
+	struct net_device *net_dev = neigh_node->if_incoming->net_dev;
 	struct batadv_priv *bat_priv;
 	struct batadv_hard_iface *primary_if = NULL;
 	struct batadv_frag_packet frag_header;
 	struct sk_buff *skb_fragment;
-	unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
+	unsigned int mtu = net_dev->mtu;
 	unsigned int header_size = sizeof(frag_header);
 	unsigned int max_fragment_size, num_fragments;
 	int ret;
@@ -503,7 +507,7 @@  int batadv_frag_send_packet(struct sk_buff *skb,
 			goto put_primary_if;
 		}
 
-		skb_fragment = batadv_frag_create(skb, &frag_header,
+		skb_fragment = batadv_frag_create(net_dev, skb, &frag_header,
 						  max_fragment_size);
 		if (!skb_fragment) {
 			ret = -ENOMEM;