batman-adv: Limit queue lengths for batman and broadcast packets
Commit Message
This patch limits the queue lengths of batman and broadcast packets. BATMAN
packets are held back for aggregation and jittered to avoid interferences.
Broadcast packets are stored to be sent out multiple times to increase
the probability to be received by other nodes in lossy environments.
Especially in extreme cases like broadcast storms, the queues have been seen
to run full, eating up all the memory and triggering the infamous OOM killer.
With the queue length limits introduced in this patch, this problem is
avoided.
Each queue is limited to 256 entries for now, resulting in 1 MB of maximum
space available in total for typical setups (assuming one packet including
overhead does not require more than 2000 byte). This should also be reasonable
for smaller routers, otherwise the defines can be tweaked later.
Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
---
Comments
On Monday 05 April 2010 04:46:03 Simon Wunderlich wrote:
> Index: a/batman-adv-kernelland/send.c
> ===================================================================
> --- a/batman-adv-kernelland/send.c (revision 1616)
> +++ a/batman-adv-kernelland/send.c (working copy)
> @@ -382,12 +382,21 @@
> {
> struct forw_packet *forw_packet;
>
> + if (atomic_dec_and_test(&bcast_queue_left)) {
> + bat_dbg(DBG_BATMAN, "bcast packet queue full\n");
> + atomic_inc(&bcast_queue_left);
> + return;
> + }
> +
[....]
> --- a/batman-adv-kernelland/aggregation.c (revision 1616)
> +++ a/batman-adv-kernelland/aggregation.c (working copy)
> @@ -106,13 +106,27 @@
> struct forw_packet *forw_packet_aggr;
> unsigned long flags;
>
> + /* own packet should always be scheduled */
> + if (!own_packet) {
> + if (atomic_dec_and_test(&batman_queue_left)) {
> + bat_dbg(DBG_BATMAN, "batman packet queue full\n");
> + atomic_inc(&batman_queue_left);
> + return;
> + }
> + }
It should be possible to have multiple events accessing these functions at the
same time, or am I wrong?
Just say we have following situation (queue is full; full == 1):
* bcast comes in and we start add_bcast_packet_to_list and
dec_test(left) == 1 -> damn, no room left for us
-> for easier understanding: someone steals our cpu or the processing is
otherwise interleaved with following
* another bcast comes in and wants attention (left is now 0):
dec_test(left) == 0 (because left is now -1... or 0xfff....fff). Lets enqueue
it and do the rest
* first bcast continues and and does atomic_inc(left) -> now it is 0
* now a storm of bcasts comes in and all do atomic_dec_and_test... each one
will be accepted because left is already zero and needs a looooong time to
be 0 again (or enough bcast packets were processed from the queue to get
positive again)
I am not 100% sure if this really can happen, but I thought that it was a hard
requirement for TCP port passed processor selection for parallel processing of
incoming packets on multicore/multiprocessor architectures.
Best regards,
Sven
Hey Sven,
you are right, this might be indeed a problem. I'll send an updated patch
which removes the problem by employing atomic_add_unless().
Marek also pointed out that global variables are not very pretty as we are
moving all the global stuff into bat_priv to allow multiple mesh soft
interfaces later. However I'd first send the patch with globals as is and
move them later in another patch.
best regards,
Simon
On Mon, Apr 05, 2010 at 01:21:36PM +0200, Sven Eckelmann wrote:
>
> It should be possible to have multiple events accessing these functions at the
> same time, or am I wrong?
>
> Just say we have following situation (queue is full; full == 1):
>
> * bcast comes in and we start add_bcast_packet_to_list and
> dec_test(left) == 1 -> damn, no room left for us
> -> for easier understanding: someone steals our cpu or the processing is
> otherwise interleaved with following
> * another bcast comes in and wants attention (left is now 0):
> dec_test(left) == 0 (because left is now -1... or 0xfff....fff). Lets enqueue
> it and do the rest
> * first bcast continues and and does atomic_inc(left) -> now it is 0
> * now a storm of bcasts comes in and all do atomic_dec_and_test... each one
> will be accepted because left is already zero and needs a looooong time to
> be 0 again (or enough bcast packets were processed from the queue to get
> positive again)
>
> I am not 100% sure if this really can happen, but I thought that it was a hard
> requirement for TCP port passed processor selection for parallel processing of
> incoming packets on multicore/multiprocessor architectures.
>
> Best regards,
> Sven
===================================================================
@@ -382,12 +382,21 @@
{
struct forw_packet *forw_packet;
+ if (atomic_dec_and_test(&bcast_queue_left)) {
+ bat_dbg(DBG_BATMAN, "bcast packet queue full\n");
+ atomic_inc(&bcast_queue_left);
+ return;
+ }
+
forw_packet = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC);
- if (!forw_packet)
+ if (!forw_packet) {
+ atomic_inc(&bcast_queue_left);
return;
+ }
skb = skb_copy(skb, GFP_ATOMIC);
if (!skb) {
+ atomic_inc(&bcast_queue_left);
kfree(forw_packet);
return;
}
@@ -435,8 +444,10 @@
if ((forw_packet->num_packets < 3) &&
(atomic_read(&module_state) != MODULE_DEACTIVATING))
_add_bcast_packet_to_list(forw_packet, ((5 * HZ) / 1000));
- else
+ else {
forw_packet_free(forw_packet);
+ atomic_inc(&bcast_queue_left);
+ }
}
void send_outstanding_bat_packet(struct work_struct *work)
@@ -462,6 +473,10 @@
(atomic_read(&module_state) != MODULE_DEACTIVATING))
schedule_own_packet(forw_packet->if_incoming);
+ /* don't count own packet */
+ if (!forw_packet->own)
+ atomic_inc(&batman_queue_left);
+
forw_packet_free(forw_packet);
}
===================================================================
@@ -46,6 +46,9 @@
atomic_t originator_interval;
atomic_t vis_interval;
+atomic_t bcast_queue_left;
+atomic_t batman_queue_left;
+
int16_t num_hna;
int16_t num_ifs;
@@ -85,6 +88,8 @@
atomic_set(&originator_interval, 1000);
atomic_set(&vis_interval, 1000);/* TODO: raise this later, this is only
* for debugging now. */
+ atomic_set(&bcast_queue_left, BCAST_QUEUE_LEN);
+ atomic_set(&batman_queue_left, BATMAN_QUEUE_LEN);
/* the name should not be longer than 10 chars - see
* http://lwn.net/Articles/23634/ */
===================================================================
@@ -106,13 +106,27 @@
struct forw_packet *forw_packet_aggr;
unsigned long flags;
+ /* own packet should always be scheduled */
+ if (!own_packet) {
+ if (atomic_dec_and_test(&batman_queue_left)) {
+ bat_dbg(DBG_BATMAN, "batman packet queue full\n");
+ atomic_inc(&batman_queue_left);
+ return;
+ }
+ }
+
forw_packet_aggr = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC);
- if (!forw_packet_aggr)
+ if (!forw_packet_aggr) {
+ if (!own_packet)
+ atomic_inc(&batman_queue_left);
return;
+ }
forw_packet_aggr->packet_buff = kmalloc(MAX_AGGREGATION_BYTES,
GFP_ATOMIC);
if (!forw_packet_aggr->packet_buff) {
+ if (!own_packet)
+ atomic_inc(&batman_queue_left);
kfree(forw_packet_aggr);
return;
}
===================================================================
@@ -70,6 +70,8 @@
#define MODULE_ACTIVE 1
#define MODULE_DEACTIVATING 2
+#define BCAST_QUEUE_LEN 256
+#define BATMAN_QUEUE_LEN 256
/*
* Debug Messages
@@ -133,6 +135,8 @@
extern atomic_t originator_interval;
extern atomic_t vis_interval;
+extern atomic_t bcast_queue_left;
+extern atomic_t batman_queue_left;
extern int16_t num_hna;
extern int16_t num_ifs;