summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-27 14:05:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-27 14:05:19 -0700
commit2decb2682f80759f631c8332f9a2a34a02150a03 (patch)
tree6c30015e4118ff6a56c67043f2ba842ed4a6e011 /net/ipv4
parentb787f68c36d49bb1d9236f403813641efa74a031 (diff)
parent22a8f237c0551bae95ffcd2a7ff17d6f5fcce7e7 (diff)
downloadkernel-2decb2682f80759f631c8332f9a2a34a02150a03.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) mlx4 doesn't check fully for supported valid RSS hash function, fix from Amir Vadai 2) Off by one in ibmveth_change_mtu(), from David Gibson 3) Prevent altera chip from reporting false error interrupts in some circumstances, from Chee Nouk Phoon 4) Get rid of that stupid endless loop trying to allocate a FIN packet in TCP, and in the process kill deadlocks. From Eric Dumazet 5) Fix get_rps_cpus() crash due to wrong invalid-cpu value, also from Eric Dumazet 6) Fix two bugs in async rhashtable resizing, from Thomas Graf 7) Fix topology server listener socket namespace bug in TIPC, from Ying Xue 8) Add some missing HAS_DMA kconfig dependencies, from Geert Uytterhoeven 9) bgmac driver intends to force re-polling but does so by returning the wrong value from it's ->poll() handler. Fix from Rafał Miłecki 10) When the creater of an rhashtable configures a max size for it, don't bark in the logs and drop insertions when that is exceeded. Fix from Johannes Berg 11) Recover from out of order packets in ppp mppe properly, from Sylvain Rochet * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (41 commits) bnx2x: really disable TPA if 'disable_tpa' option is set net:treewide: Fix typo in drivers/net net/mlx4_en: Prevent setting invalid RSS hash function mdio-mux-gpio: use new gpiod_get_array and gpiod_put_array functions netfilter; Add some missing default cases to switch statements in nft_reject. ppp: mppe: discard late packet in stateless mode ppp: mppe: sanity error path rework net/bonding: Make DRV macros private net: rfs: fix crash in get_rps_cpus() altera tse: add support for fixed-links. pxa168: fix double deallocation of managed resources net: fix crash in build_skb() net: eth: altera: Resolve false errors from MSGDMA to TSE ehea: Fix memory hook reference counting crashes net/tg3: Release IRQs on permanent error net: mdio-gpio: support access that may sleep inet: fix possible panic in reqsk_queue_unlink() rhashtable: don't attempt to grow when at max_size bgmac: fix requests for extra polling calls from NAPI tcp: avoid looping in tcp_send_fin() ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inet_connection_sock.c34
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_output.c64
4 files changed, 85 insertions, 23 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5c3dd6267ed3..8976ca423a07 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -564,6 +564,40 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
}
EXPORT_SYMBOL(inet_rtx_syn_ack);
+/* return true if req was found in the syn_table[] */
+static bool reqsk_queue_unlink(struct request_sock_queue *queue,
+ struct request_sock *req)
+{
+ struct listen_sock *lopt = queue->listen_opt;
+ struct request_sock **prev;
+ bool found = false;
+
+ spin_lock(&queue->syn_wait_lock);
+
+ for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL;
+ prev = &(*prev)->dl_next) {
+ if (*prev == req) {
+ *prev = req->dl_next;
+ found = true;
+ break;
+ }
+ }
+
+ spin_unlock(&queue->syn_wait_lock);
+ if (del_timer(&req->rsk_timer))
+ reqsk_put(req);
+ return found;
+}
+
+void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+{
+ if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) {
+ reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
+ reqsk_put(req);
+ }
+}
+EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
+
static void reqsk_timer_handler(unsigned long data)
{
struct request_sock *req = (struct request_sock *)data;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3571f2be4470..fc1c658ec6c1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1348,7 +1348,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
if (req) {
nsk = tcp_check_req(sk, skb, req, false);
- reqsk_put(req);
+ if (!nsk)
+ reqsk_put(req);
return nsk;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 63d6311b5365..e5d7649136fc 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -755,10 +755,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (!child)
goto listen_overflow;
- inet_csk_reqsk_queue_unlink(sk, req);
- inet_csk_reqsk_queue_removed(sk, req);
-
+ inet_csk_reqsk_queue_drop(sk, req);
inet_csk_reqsk_queue_add(sk, req, child);
+ /* Warning: caller must not call reqsk_put(req);
+ * child stole last reference on it.
+ */
return child;
listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8c8d7e06b72f..a369e8a70b2c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2812,39 +2812,65 @@ begin_fwd:
}
}
-/* Send a fin. The caller locks the socket for us. This cannot be
- * allowed to fail queueing a FIN frame under any circumstances.
+/* We allow to exceed memory limits for FIN packets to expedite
+ * connection tear down and (memory) recovery.
+ * Otherwise tcp_send_fin() could be tempted to either delay FIN
+ * or even be forced to close flow without any FIN.
+ */
+static void sk_forced_wmem_schedule(struct sock *sk, int size)
+{
+ int amt, status;
+
+ if (size <= sk->sk_forward_alloc)
+ return;
+ amt = sk_mem_pages(size);
+ sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
+ sk_memory_allocated_add(sk, amt, &status);
+}
+
+/* Send a FIN. The caller locks the socket for us.
+ * We should try to send a FIN packet really hard, but eventually give up.
*/
void tcp_send_fin(struct sock *sk)
{
+ struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb = tcp_write_queue_tail(sk);
- int mss_now;
- /* Optimization, tack on the FIN if we have a queue of
- * unsent frames. But be careful about outgoing SACKS
- * and IP options.
+ /* Optimization, tack on the FIN if we have one skb in write queue and
+ * this skb was not yet sent, or we are under memory pressure.
+ * Note: in the latter case, FIN packet will be sent after a timeout,
+ * as TCP stack thinks it has already been transmitted.
*/
- mss_now = tcp_current_mss(sk);
-
- if (tcp_send_head(sk)) {
- TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
- TCP_SKB_CB(skb)->end_seq++;
+ if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) {
+coalesce:
+ TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
+ TCP_SKB_CB(tskb)->end_seq++;
tp->write_seq++;
+ if (!tcp_send_head(sk)) {
+ /* This means tskb was already sent.
+ * Pretend we included the FIN on previous transmit.
+ * We need to set tp->snd_nxt to the value it would have
+ * if FIN had been sent. This is because retransmit path
+ * does not change tp->snd_nxt.
+ */
+ tp->snd_nxt++;
+ return;
+ }
} else {
- /* Socket is locked, keep trying until memory is available. */
- for (;;) {
- skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
- if (skb)
- break;
- yield();
+ skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
+ if (unlikely(!skb)) {
+ if (tskb)
+ goto coalesce;
+ return;
}
+ skb_reserve(skb, MAX_TCP_HEADER);
+ sk_forced_wmem_schedule(sk, skb->truesize);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
tcp_init_nondata_skb(skb, tp->write_seq,
TCPHDR_ACK | TCPHDR_FIN);
tcp_queue_skb(sk, skb);
}
- __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
+ __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
}
/* We get here when a process closes a file descriptor (either due to