diff mbox

[1/9] batman-adv: Check hard_iface refcnt before calling function

Message ID 1457190564-11419-1-git-send-email-sven@narfation.org
State Accepted, archived
Commit bd2dcf52ce295b2e8d7448546539f0e96b8ecdcc
Delegated to: Marek Lindner
Headers show

Commit Message

Sven Eckelmann March 5, 2016, 3:09 p.m. UTC
The batadv_hardif_list list is checked in many situations and the items
in this list are given to specialized functions to modify the routing
behavior. At the moment each of these called functions has to check
itself whether the received batadv_hard_iface has a refcount > 0 before
it can increase the reference counter and use it in other objects.

This can easily lead to problems because it is not easily visible where
all callers of a function got the batadv_hard_iface object from and
whether they already hold a valid reference.

Checking the reference counter directly before calling a subfunction
with a pointer from the batadv_hardif_list avoids this problem.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
---
 net/batman-adv/bat_iv_ogm.c | 11 +++++++++++
 net/batman-adv/bat_v_ogm.c  | 14 +++++++++++++-
 net/batman-adv/originator.c |  5 +++++
 net/batman-adv/send.c       |  6 ++++++
 4 files changed, 35 insertions(+), 1 deletion(-)

Comments

Andrew Lunn March 6, 2016, 12:08 a.m. UTC | #1
Hi Sven

Thanks for the patches. I will test them out of Monday and let you
know if it fixes the crash.

     Andrew

On Sat, Mar 05, 2016 at 04:09:16PM +0100, Sven Eckelmann wrote:
> The batadv_hardif_list list is checked in many situations and the items
> in this list are given to specialized functions to modify the routing
> behavior. At the moment each of these called functions has to check
> itself whether the received batadv_hard_iface has a refcount > 0 before
> it can increase the reference counter and use it in other objects.
> 
> This can easily lead to problems because it is not easily visible where
> all callers of a function got the batadv_hard_iface object from and
> whether they already hold a valid reference.
> 
> Checking the reference counter directly before calling a subfunction
> with a pointer from the batadv_hardif_list avoids this problem.
> 
> Signed-off-by: Sven Eckelmann <sven@narfation.org>
> ---
>  net/batman-adv/bat_iv_ogm.c | 11 +++++++++++
>  net/batman-adv/bat_v_ogm.c  | 14 +++++++++++++-
>  net/batman-adv/originator.c |  5 +++++
>  net/batman-adv/send.c       |  6 ++++++
>  4 files changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
> index 2c65668..b390ff9 100644
> --- a/net/batman-adv/bat_iv_ogm.c
> +++ b/net/batman-adv/bat_iv_ogm.c
> @@ -985,9 +985,15 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
>  	list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) {
>  		if (tmp_hard_iface->soft_iface != hard_iface->soft_iface)
>  			continue;
> +
> +		if (!kref_get_unless_zero(&tmp_hard_iface->refcount))
> +			continue;
> +
>  		batadv_iv_ogm_queue_add(bat_priv, *ogm_buff,
>  					*ogm_buff_len, hard_iface,
>  					tmp_hard_iface, 1, send_time);
> +
> +		batadv_hardif_put(tmp_hard_iface);
>  	}
>  	rcu_read_unlock();
>  
> @@ -1767,8 +1773,13 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
>  		if (hard_iface->soft_iface != bat_priv->soft_iface)
>  			continue;
>  
> +		if (!kref_get_unless_zero(&hard_iface->refcount))
> +			continue;
> +
>  		batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node,
>  						if_incoming, hard_iface);
> +
> +		batadv_hardif_put(hard_iface);
>  	}
>  	rcu_read_unlock();
>  
> diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
> index 4155fa5..473ebb9 100644
> --- a/net/batman-adv/bat_v_ogm.c
> +++ b/net/batman-adv/bat_v_ogm.c
> @@ -26,6 +26,7 @@
>  #include <linux/if_ether.h>
>  #include <linux/jiffies.h>
>  #include <linux/kernel.h>
> +#include <linux/kref.h>
>  #include <linux/list.h>
>  #include <linux/netdevice.h>
>  #include <linux/random.h>
> @@ -176,6 +177,9 @@ static void batadv_v_ogm_send(struct work_struct *work)
>  		if (hard_iface->soft_iface != bat_priv->soft_iface)
>  			continue;
>  
> +		if (!kref_get_unless_zero(&hard_iface->refcount))
> +			continue;
> +
>  		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
>  			   "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n",
>  			   ogm_packet->orig, ntohl(ogm_packet->seqno),
> @@ -185,10 +189,13 @@ static void batadv_v_ogm_send(struct work_struct *work)
>  
>  		/* this skb gets consumed by batadv_v_ogm_send_to_if() */
>  		skb_tmp = skb_clone(skb, GFP_ATOMIC);
> -		if (!skb_tmp)
> +		if (!skb_tmp) {
> +			batadv_hardif_put(hard_iface);
>  			break;
> +		}
>  
>  		batadv_v_ogm_send_to_if(skb_tmp, hard_iface);
> +		batadv_hardif_put(hard_iface);
>  	}
>  	rcu_read_unlock();
>  
> @@ -704,9 +711,14 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
>  		if (hard_iface->soft_iface != bat_priv->soft_iface)
>  			continue;
>  
> +		if (!kref_get_unless_zero(&hard_iface->refcount))
> +			continue;
> +
>  		batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet,
>  					       orig_node, neigh_node,
>  					       if_incoming, hard_iface);
> +
> +		batadv_hardif_put(hard_iface);
>  	}
>  	rcu_read_unlock();
>  out:
> diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
> index e63d6a5..44c508a 100644
> --- a/net/batman-adv/originator.c
> +++ b/net/batman-adv/originator.c
> @@ -1165,6 +1165,9 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
>  		if (hard_iface->soft_iface != bat_priv->soft_iface)
>  			continue;
>  
> +		if (!kref_get_unless_zero(&hard_iface->refcount))
> +			continue;
> +
>  		best_neigh_node = batadv_find_best_neighbor(bat_priv,
>  							    orig_node,
>  							    hard_iface);
> @@ -1172,6 +1175,8 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
>  				    best_neigh_node);
>  		if (best_neigh_node)
>  			batadv_neigh_node_put(best_neigh_node);
> +
> +		batadv_hardif_put(hard_iface);
>  	}
>  	rcu_read_unlock();
>  
> diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
> index 3ce06e0..43a950e 100644
> --- a/net/batman-adv/send.c
> +++ b/net/batman-adv/send.c
> @@ -26,6 +26,7 @@
>  #include <linux/if.h>
>  #include <linux/jiffies.h>
>  #include <linux/kernel.h>
> +#include <linux/kref.h>
>  #include <linux/list.h>
>  #include <linux/netdevice.h>
>  #include <linux/printk.h>
> @@ -577,10 +578,15 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
>  		if (forw_packet->num_packets >= hard_iface->num_bcasts)
>  			continue;
>  
> +		if (!kref_get_unless_zero(&hard_iface->refcount))
> +			continue;
> +
>  		/* send a copy of the saved skb */
>  		skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
>  		if (skb1)
>  			batadv_send_broadcast_skb(skb1, hard_iface);
> +
> +		batadv_hardif_put(hard_iface);
>  	}
>  	rcu_read_unlock();
>  
> -- 
> 2.7.0
>
Sven Eckelmann March 6, 2016, 8:55 a.m. UTC | #2
On Sunday 06 March 2016 01:08:23 Andrew Lunn wrote:
> Hi Sven
> 
> Thanks for the patches. I will test them out of Monday and let you
> know if it fixes the crash.

The ones I've Cc'ed you are not really about fixing the crash. They are just 
to clean up the reference counting (which you've complained about). You can 
test them but I would doubt that they fix the problem. But they might help you 
to detect situation when the refcounter gets 0 but shouldn't.

There are also some related problems which I've already mentioned on the 
mailing list 9 months ago and never got an answer. I've moved them now to the 
issue tracker in hope that the authors of the feature will now reply. The 
unchecked double *_put with [2,3] together with the unchecked double list_adds 
[1] are a good way to mess up the reference counting of several objects 
(including the hard interfaces).

You may also want to add a real reference counting fix which I've also posted 
yesterday (but for non-hard-interface objects) [4].

And a small remark about your x86 without serial port. Two systems connected 
via usb-serial+null-model-cable+usb-serial work quite well for this type of 
tests. Add  "console=tty0 console=ttyUSB0,115200,n8" to the bootargs of your 
test system and boot it up. The system used for gathering the kernel output 
just has to run "screen /dev/ttyUSB0 115200" and dump all the data.

Just a suggestion in case you didn't think about using this kind of setup.

Thanks for debugging this problem and have a nice weekend.

Kind regards,
	Sven

[1] https://www.open-mesh.org/issues/243
[2] https://www.open-mesh.org/issues/242
[3] https://www.open-mesh.org/issues/235
[4] https://patchwork.open-mesh.org/patch/15888/
Andrew Lunn March 6, 2016, 2:10 p.m. UTC | #3
> And a small remark about your x86 without serial port. Two systems connected 
> via usb-serial+null-model-cable+usb-serial work quite well for this type of 
> tests. Add  "console=tty0 console=ttyUSB0,115200,n8" to the bootargs of your 
> test system and boot it up. The system used for gathering the kernel output 
> just has to run "screen /dev/ttyUSB0 115200" and dump all the data.

Hi Sven

The box does have a couple of FTDI chips. But i've had mixed results
with such a setup. Often the Opps does not get out of the USB
subsystem before the box is dead. I can give it a try and see what
happens in this case.

	Andrew
Marek Lindner March 28, 2016, 2:19 p.m. UTC | #4
On Saturday, March 05, 2016 16:09:16 Sven Eckelmann wrote:
> The batadv_hardif_list list is checked in many situations and the items
> in this list are given to specialized functions to modify the routing
> behavior. At the moment each of these called functions has to check
> itself whether the received batadv_hard_iface has a refcount > 0 before
> it can increase the reference counter and use it in other objects.
> 
> This can easily lead to problems because it is not easily visible where
> all callers of a function got the batadv_hard_iface object from and
> whether they already hold a valid reference.
> 
> Checking the reference counter directly before calling a subfunction
> with a pointer from the batadv_hardif_list avoids this problem.
> 
> Signed-off-by: Sven Eckelmann <sven@narfation.org>
> ---
>  net/batman-adv/bat_iv_ogm.c | 11 +++++++++++
>  net/batman-adv/bat_v_ogm.c  | 14 +++++++++++++-
>  net/batman-adv/originator.c |  5 +++++
>  net/batman-adv/send.c       |  6 ++++++
>  4 files changed, 35 insertions(+), 1 deletion(-)

Applied in revision bd2dcf5.

Thanks,
Marek
diff mbox

Patch

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 2c65668..b390ff9 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -985,9 +985,15 @@  static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
 	list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) {
 		if (tmp_hard_iface->soft_iface != hard_iface->soft_iface)
 			continue;
+
+		if (!kref_get_unless_zero(&tmp_hard_iface->refcount))
+			continue;
+
 		batadv_iv_ogm_queue_add(bat_priv, *ogm_buff,
 					*ogm_buff_len, hard_iface,
 					tmp_hard_iface, 1, send_time);
+
+		batadv_hardif_put(tmp_hard_iface);
 	}
 	rcu_read_unlock();
 
@@ -1767,8 +1773,13 @@  static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
 		if (hard_iface->soft_iface != bat_priv->soft_iface)
 			continue;
 
+		if (!kref_get_unless_zero(&hard_iface->refcount))
+			continue;
+
 		batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node,
 						if_incoming, hard_iface);
+
+		batadv_hardif_put(hard_iface);
 	}
 	rcu_read_unlock();
 
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 4155fa5..473ebb9 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -26,6 +26,7 @@ 
 #include <linux/if_ether.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/random.h>
@@ -176,6 +177,9 @@  static void batadv_v_ogm_send(struct work_struct *work)
 		if (hard_iface->soft_iface != bat_priv->soft_iface)
 			continue;
 
+		if (!kref_get_unless_zero(&hard_iface->refcount))
+			continue;
+
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n",
 			   ogm_packet->orig, ntohl(ogm_packet->seqno),
@@ -185,10 +189,13 @@  static void batadv_v_ogm_send(struct work_struct *work)
 
 		/* this skb gets consumed by batadv_v_ogm_send_to_if() */
 		skb_tmp = skb_clone(skb, GFP_ATOMIC);
-		if (!skb_tmp)
+		if (!skb_tmp) {
+			batadv_hardif_put(hard_iface);
 			break;
+		}
 
 		batadv_v_ogm_send_to_if(skb_tmp, hard_iface);
+		batadv_hardif_put(hard_iface);
 	}
 	rcu_read_unlock();
 
@@ -704,9 +711,14 @@  static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
 		if (hard_iface->soft_iface != bat_priv->soft_iface)
 			continue;
 
+		if (!kref_get_unless_zero(&hard_iface->refcount))
+			continue;
+
 		batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet,
 					       orig_node, neigh_node,
 					       if_incoming, hard_iface);
+
+		batadv_hardif_put(hard_iface);
 	}
 	rcu_read_unlock();
 out:
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index e63d6a5..44c508a 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1165,6 +1165,9 @@  static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
 		if (hard_iface->soft_iface != bat_priv->soft_iface)
 			continue;
 
+		if (!kref_get_unless_zero(&hard_iface->refcount))
+			continue;
+
 		best_neigh_node = batadv_find_best_neighbor(bat_priv,
 							    orig_node,
 							    hard_iface);
@@ -1172,6 +1175,8 @@  static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
 				    best_neigh_node);
 		if (best_neigh_node)
 			batadv_neigh_node_put(best_neigh_node);
+
+		batadv_hardif_put(hard_iface);
 	}
 	rcu_read_unlock();
 
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 3ce06e0..43a950e 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -26,6 +26,7 @@ 
 #include <linux/if.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
@@ -577,10 +578,15 @@  static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 		if (forw_packet->num_packets >= hard_iface->num_bcasts)
 			continue;
 
+		if (!kref_get_unless_zero(&hard_iface->refcount))
+			continue;
+
 		/* send a copy of the saved skb */
 		skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
 		if (skb1)
 			batadv_send_broadcast_skb(skb1, hard_iface);
+
+		batadv_hardif_put(hard_iface);
 	}
 	rcu_read_unlock();