[maint] batman-adv: Fix TT sync flags for intermediate TT responses

Message ID 20180505153020.29636-1-linus.luessing@c0d3.blue (mailing list archive)
State Superseded, archived
Delegated to: Antonio Quartulli
Headers
Series [maint] batman-adv: Fix TT sync flags for intermediate TT responses |

Commit Message

Linus Lüssing May 5, 2018, 3:30 p.m. UTC
  The previous TT sync fix so far only fixed TT responses issued by the
target node directly. So far, TT responses issued by intermediate nodes
still lead to the wrong flags being added, leading to CRC mismatches.

This behaviour was observed at Freifunk Hannover in a 800 nodes setup
where a considerable amount of nodes were still infected with 'WI'
TT flags even with (most) nodes having the previous TT sync fix applied.

I was able to reproduce the issue with intermediate TT responses in a
four node test setup and this patch fixes this issue by ensuring to
use the per originator instead of the summarized, OR'd ones.

Fixes: fa614fd04692 ("batman-adv: fix tt_global_entries flags update")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
---
 net/batman-adv/translation-table.c | 42 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)
  

Comments

Linus Lüssing May 5, 2018, 3:46 p.m. UTC | #1
On Sat, May 05, 2018 at 05:30:20PM +0200, Linus Lüssing wrote:
> I was able to reproduce the issue with intermediate TT responses in a
> four node test setup and this patch fixes this issue by ensuring to
> use the per originator instead of the summarized, OR'd ones.

Reproduction in VMs was a little bit tricky though. I used the
attached patch to:

A) Disable TT changes attached to OGMs
B) Enforce TT full table responses
C) Allow injecting a 'W' flag onto multicast TT entries on
   selected nodes

With this patch applied I used the following steps to reproduce
the issue:

1) Prepare a four nodes setup with a line topology:
   1 - 2 - 3 - 4
2) Enable IPv6
3) Start (patched) batman-adv
4) Node 4: Set multicast_mode to 2
5) Node 4: Set bat0 down & up
   -> check that 'W' flag is present in global TT for node 4
6) Node 3: Set bat0 down & up
7) Node 1: Check that 'W' flag is now present on
           multicast entries for both node 3 and 4

With the fix applied step 7 shows no more 'W' flags for node 3.

Regards, Linus
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index f2eef43b..cb79a1b8 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -687,7 +687,7 @@ static BATADV_ATTR(gw_sel_class, 0644, batadv_show_gw_sel_class,
 static BATADV_ATTR(gw_bandwidth, 0644, batadv_show_gw_bwidth,
 		   batadv_store_gw_bwidth);
 #ifdef CONFIG_BATMAN_ADV_MCAST
-BATADV_ATTR_SIF_BOOL(multicast_mode, 0644, NULL);
+BATADV_ATTR_SIF_UINT(multicast_mode, multicast_mode, 0644, 0, 2, NULL);
 #endif
 #ifdef CONFIG_BATMAN_ADV_DEBUG
 BATADV_ATTR_SIF_UINT(log_level, log_level, 0644, 0, BATADV_DBG_ALL, NULL);
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index f141c14e..4218597e 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -820,6 +820,15 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 
 	ret = true;
 out:
+	if (is_multicast_ether_addr(addr)) {
+		if (atomic_read(&bat_priv->multicast_mode) > 1) {
+			tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
+			printk("~~~ %s: Forcing wifi flag on multicast entry\n", __func__);
+		} else {
+			printk("~~~ %s: Not forcing wifi flag on multicast entry\n", __func__);
+		}
+	}
+
 	if (in_hardif)
 		batadv_hardif_put(in_hardif);
 	if (in_dev)
@@ -1170,6 +1179,15 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
 	if (!hdr)
 		return -ENOBUFS;
 
+	if (is_multicast_ether_addr(common->addr)) {
+		if (atomic_read(&bat_priv->multicast_mode) > 1) {
+			printk("~~~ %s: WIFI flag is %sset\n", __func__,
+				common->flags & BATADV_TT_CLIENT_WIFI ? "" : "not ");
+			printk("~~~ %s: flags: %u, WIFI: %u, NOPURGE: %u\n", __func__,
+				common->flags, BATADV_TT_CLIENT_WIFI, BATADV_TT_CLIENT_NOPURGE);
+		}
+	}
+
 	if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
 	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
 	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
@@ -3161,8 +3179,8 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
 		tt_vlan++;
 	}
 
-	if (full_table)
-		tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE;
+//	if (full_table)
+	tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE;
 
 	batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending TT_REQUEST to %pM [%c]\n",
 		   dst_orig_node->orig, full_table ? 'F' : '.');
@@ -3284,6 +3302,8 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
 		if (!tt_len)
 			goto out;
 
+		printk("~~~ %s: generating full table TT response for %pM, req by %pM\n", __func__, req_dst, req_src);
+
 		/* fill the rest of the tvlv with the real TT entries */
 		batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.global_hash,
 					tt_change, tt_len,
@@ -3413,6 +3433,8 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
 		if (!tt_len || !tvlv_len)
 			goto out;
 
+		printk("~~~ %s: generating full table TT response for me, req by %pM\n", __func__, req_src);
+
 		/* fill the rest of the tvlv with the real TT entries */
 		batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.local_hash,
 					tt_change, tt_len,
@@ -4230,7 +4252,8 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
 	num_entries = batadv_tt_entries(tvlv_value_len);
 
 	batadv_tt_update_orig(bat_priv, orig, tt_vlan, num_vlan, tt_change,
-			      num_entries, tt_data->ttvn);
+			      0, tt_data->ttvn);
+//			      num_entries, tt_data->ttvn);
 }
 
 /**
  
Sven Eckelmann May 5, 2018, 8:23 p.m. UTC | #2
On Samstag, 5. Mai 2018 17:30:20 CEST Linus Lüssing wrote:
[...]
> diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
> index 0225616d..a95724ea 100644
> --- a/net/batman-adv/translation-table.c
> +++ b/net/batman-adv/translation-table.c
> @@ -2914,6 +2914,41 @@ static bool batadv_tt_global_valid(const void *entry_ptr,
>  }
>  
>  /**
> + * batadv_tt_get_flags() - get TT flags for a given entry
> + * @tt_common_entry: the TT entry to get the flags for
> + * @orig: for a TT global entry the specific node to get the flags for
> + *
> + * Return: -ENOENT on error or the TT flags otherwise.
> + */
> +static int
> +batadv_tt_get_flags(struct batadv_tt_common_entry *tt_common_entry,
> +		    struct batadv_orig_node *orig)

Not sure whether it is a good idea to use the return here for the error code 
(< 0) and the flags (__u8/enum batadv_tt_client_flags). I will leave the 
decision here to Antonio.

[...]
>   * batadv_tt_tvlv_generate() - fill the tvlv buff with the tt entries from the
>   *  specified tt hash
>   * @bat_priv: the bat priv with all the soft interface information
> @@ -2934,6 +2969,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
>  	struct batadv_tvlv_tt_change *tt_change;
>  	struct hlist_head *head;
>  	u16 tt_tot, tt_num_entries = 0;
> +	int flags;
>  	u32 i;
>  
>  	tt_tot = batadv_tt_entries(tt_len);
> @@ -2951,8 +2987,12 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
>  			if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data)))
>  				continue;
>  
> +			flags = batadv_tt_get_flags(tt_common_entry, cb_data);
> +			if (flags < 0)
> +				continue;

The second argument of batadv_tt_get_flags is a little bit tricky here. The 
kernel-doc says that cb_data is "data passed to the filter function as 
argument" and is of type "void *". But you are now using it here as if would 
always be of type "struct batadv_orig_node *". This doesn't make problem  for 
now because only two functions are using this argument - luckily they are 
either set it to NULL or to "batadv_orig_node *".

I would propose to make it clear that this argument must be 
"struct batadv_orig_node *" and not an arbitrary argument which is only used 
by the valid_cb callback.

Kind regards,
	Sven
  
Marek Lindner May 8, 2018, 7:20 p.m. UTC | #3
On Sunday, May 6, 2018 4:23:37 AM HKT Sven Eckelmann wrote:
> > * batadv_tt_tvlv_generate() - fill the tvlv buff with the tt entries from
> > the *  specified tt hash
> > * @bat_priv: the bat priv with all the soft interface information
> > @@ -2934,6 +2969,7 @@ static void batadv_tt_tvlv_generate(struct
> > batadv_priv *bat_priv, struct batadv_tvlv_tt_change *tt_change;
> > struct hlist_head *head;
> > u16 tt_tot, tt_num_entries = 0;
> > +       int flags;
> > u32 i;
> > 
> > tt_tot = batadv_tt_entries(tt_len);
> > @@ -2951,8 +2987,12 @@ static void batadv_tt_tvlv_generate(struct
> > batadv_priv *bat_priv, if ((valid_cb) && (!valid_cb(tt_common_entry,
> > cb_data)))
> > continue;
> > 
> > +                       flags = batadv_tt_get_flags(tt_common_entry,
> > cb_data); +                       if (flags < 0)
> > +                               continue;
> 
> The second argument of batadv_tt_get_flags is a little bit tricky here. The
> kernel-doc says that cb_data is "data passed to the filter function as
> argument" and is of type "void *". But you are now using it here as if would
> always be of type "struct batadv_orig_node *". This doesn't make problem 
> for now because only two functions are using this argument - luckily they
> are either set it to NULL or to "batadv_orig_node *".
> 
> I would propose to make it clear that this argument must be
> "struct batadv_orig_node *" and not an arbitrary argument which is only used
> by the valid_cb callback.

I second that concern. An alternative suggestion: Modify 
batadv_tt_local_valid() / batadv_tt_global_valid() to return the flags as 
those functions already distinguish between the local and global case. 
Moreover, you save the extra batadv_tt_global_orig_entry_find() call which 
already happens in  batadv_tt_global_valid().

Cheers,
Marek
  
Antonio Quartulli May 8, 2018, 7:55 p.m. UTC | #4
On 09/05/18 03:20, Marek Lindner wrote:
> On Sunday, May 6, 2018 4:23:37 AM HKT Sven Eckelmann wrote:
>>> * batadv_tt_tvlv_generate() - fill the tvlv buff with the tt entries from
>>> the *  specified tt hash
>>> * @bat_priv: the bat priv with all the soft interface information
>>> @@ -2934,6 +2969,7 @@ static void batadv_tt_tvlv_generate(struct
>>> batadv_priv *bat_priv, struct batadv_tvlv_tt_change *tt_change;
>>> struct hlist_head *head;
>>> u16 tt_tot, tt_num_entries = 0;
>>> +       int flags;
>>> u32 i;
>>>
>>> tt_tot = batadv_tt_entries(tt_len);
>>> @@ -2951,8 +2987,12 @@ static void batadv_tt_tvlv_generate(struct
>>> batadv_priv *bat_priv, if ((valid_cb) && (!valid_cb(tt_common_entry,
>>> cb_data)))
>>> continue;
>>>
>>> +                       flags = batadv_tt_get_flags(tt_common_entry,
>>> cb_data); +                       if (flags < 0)
>>> +                               continue;
>>
>> The second argument of batadv_tt_get_flags is a little bit tricky here. The
>> kernel-doc says that cb_data is "data passed to the filter function as
>> argument" and is of type "void *". But you are now using it here as if would
>> always be of type "struct batadv_orig_node *". This doesn't make problem 
>> for now because only two functions are using this argument - luckily they
>> are either set it to NULL or to "batadv_orig_node *".
>>
>> I would propose to make it clear that this argument must be
>> "struct batadv_orig_node *" and not an arbitrary argument which is only used
>> by the valid_cb callback.
> 
> I second that concern. An alternative suggestion: Modify 
> batadv_tt_local_valid() / batadv_tt_global_valid() to return the flags as 
> those functions already distinguish between the local and global case. 
> Moreover, you save the extra batadv_tt_global_orig_entry_find() call which 
> already happens in  batadv_tt_global_valid().

I like Marek's idea, but I'd suggest to change the name of the
tt_*_valid() functions to something a bit more generic.


Apart from that I agree that that the flags retrieval could directly be
done within the callback which already knows if we are in the local or
global case.

Cheers,
  
Leonardo Mörlein May 9, 2018, 3:08 p.m. UTC | #5
I recently did a little experimentation on finding the "bad" (gluon)
nodes from a perspective of the vpn gateways with help of the tool
tshark. To document those procedure a little, I'll write it down in this
mail.

Starting point: we have a trace of the mesh-vpn interface from gateway
with the MAC 88:e6:40:20:50:01 which is named foo.pcap. (~10 sec)

As gluon nodes do not use the "isolation" flag of the translation
tables, the isolation flag is a good indicator to find affected packets.
Because the flag is only obviously only inside the TT response packets,
we need to filter for those. "(batadv.tvlv.tt.flags.type == 0x4) &&
(batadv.tvlv.tt.change.flags.isolate == 1)"

The next issue, when we use only the filters above is, that we see
forwarded responses twice, so we add a filter to only see the incomming
responses. To which node the response is forwarded is not really
important as those nodes are not the affected ones. So we add the filter
"(eth.dst == 88:e6:40:20:50:01)" to filter the packets directly
designated for us.

Now we can get the nexthop from which we are receiving the bad packets
by observing "eth.src". To sum it up, the following command is useful
for us:

tshark -r foo.pcap  -Y "(batadv.tvlv.tt.flags.type == 0x4) \
  && (batadv.tvlv.tt.change.flags.isolate == 1) \
  && (eth.dst == 88:e6:40:20:50:01)" \
  -e "eth.src" -Tfields | sort | uniq -c | sort -n

In my case, the output looks like this:

      2 2e:3a:10:c2:4e:5f
      2 88:e6:40:20:20:01
      5 88:e6:40:20:40:01
      9 ba:50:eb:56:50:97
     11 12:b7:57:22:e7:9f
     68 88:e6:40:20:70:01
    129 9e:b9:ba:ee:7e:9f
    194 96:c5:90:5a:1e:6f
    216 6e:34:7f:a4:90:a7
    338 88:e6:40:20:60:01

So we received 974 bad packets in the capture time. The MACs
88:e6:40:XX:XX:XX are the MACs of the other gateways, so we do not
consider them for now, as our aim is to find the bad freifunk routers
connected to this gateway.

Lets focus for now on the node 96:c5:90:5a:1e:6f. We now want to figure
out, which node is the originating node of the TT responses. So the
first idea would be to use the "batadv.unicast_tvlv.src" field of the
packages from 96:c5:90:5a:1e:6f. But this is not the originating node of
the response as intermediate nodes (which maybe answer the query) will
send the responses on behalf of the node for which they are answering.
So from "batadv.unicast_tvlv.src" we only receive the information, which
node was the target of the corresponding request and not which node
answered. But this information is valuable also as the bad node has to
be in the path between our nextnode (96:c5:90:5a:1e:6f) and the query
target. We may use this command:

tshark -r foo.pcap  -Y "(batadv.tvlv.tt.flags.type == 0x4) \
  && (batadv.tvlv.tt.change.flags.isolate == 1) \
  && (eth.dst == 88:e6:40:20:50:01) \
  && (eth.src == 96:c5:90:5a:1e:6f)" \
  -e "batadv.unicast_tvlv.src" -Tfields | sort | uniq -c | sort -n

In my dump the result of the command is:

      1 7a:25:0e:c4:c1:93
      2 02:a8:70:84:9d:fb
    191 4a:87:fc:b6:1d:fb

The interpretation of the result is, that there are 3 nodes which for
which we receive bad packets from the nextnode. So the bad node has to
somewhere between 96:c5:90:5a:1e:6f and one of the three nodes.

Kind regards,
Leonardo Mörlein
  

Patch

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 0225616d..a95724ea 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -2914,6 +2914,41 @@  static bool batadv_tt_global_valid(const void *entry_ptr,
 }
 
 /**
+ * batadv_tt_get_flags() - get TT flags for a given entry
+ * @tt_common_entry: the TT entry to get the flags for
+ * @orig: for a TT global entry the specific node to get the flags for
+ *
+ * Return: -ENOENT on error or the TT flags otherwise.
+ */
+static int
+batadv_tt_get_flags(struct batadv_tt_common_entry *tt_common_entry,
+		    struct batadv_orig_node *orig)
+{
+	const struct batadv_tt_global_entry *tt_global_entry;
+	struct batadv_tt_orig_list_entry *orig_entry;
+	int flags;
+
+	/* a tt-local entry has no 'per orig' entries */
+	if (!orig)
+		return tt_common_entry->flags;
+
+	/* else this is a tt-global entry, get flags for node */
+	tt_global_entry = container_of(tt_common_entry,
+				       struct batadv_tt_global_entry,
+				       common);
+
+	orig_entry = batadv_tt_global_orig_entry_find(tt_global_entry, orig);
+	if (!orig_entry)
+		return -ENOENT;
+
+	flags = orig_entry->flags;
+
+	batadv_tt_orig_list_entry_put(orig_entry);
+
+	return flags;
+}
+
+/**
  * batadv_tt_tvlv_generate() - fill the tvlv buff with the tt entries from the
  *  specified tt hash
  * @bat_priv: the bat priv with all the soft interface information
@@ -2934,6 +2969,7 @@  static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
 	struct batadv_tvlv_tt_change *tt_change;
 	struct hlist_head *head;
 	u16 tt_tot, tt_num_entries = 0;
+	int flags;
 	u32 i;
 
 	tt_tot = batadv_tt_entries(tt_len);
@@ -2951,8 +2987,12 @@  static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
 			if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data)))
 				continue;
 
+			flags = batadv_tt_get_flags(tt_common_entry, cb_data);
+			if (flags < 0)
+				continue;
+
 			ether_addr_copy(tt_change->addr, tt_common_entry->addr);
-			tt_change->flags = tt_common_entry->flags;
+			tt_change->flags = flags;
 			tt_change->vid = htons(tt_common_entry->vid);
 			memset(tt_change->reserved, 0,
 			       sizeof(tt_change->reserved));